[PATCH] i386: Allow a kernel not to be in ring 0
We allow for the fact that the guest kernel may not run in ring 0. This requires some abstraction in a few places when setting %cs or checking privilege level (user vs kernel). This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather than using #define USER_MODE_MASK which depends on a config option, we use Zach's more flexible approach of assuming ring 3 == userspace. I also used "get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in the code... 1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a get_kernel_rpl() macro, and don't assume it's zero. And: Clean up of patch for letting kernel run other than ring 0: a. Add some comments about the SEGMENT_IS_*_CODE() macros. b. Add a USER_RPL macro. (Code was comparing a value to a mask in some places and to the magic number 3 in other places.) c. Add macros for table indicator field and use them. d. Change the entry.S tests for LDT stack segment to use the macros Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Andi Kleen <ak@suse.de>
This commit is contained in:
Родитель
0da5db3133
Коммит
78be3706b2
|
@ -240,8 +240,9 @@ ret_from_intr:
|
||||||
check_userspace:
|
check_userspace:
|
||||||
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
|
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
|
||||||
movb CS(%esp), %al
|
movb CS(%esp), %al
|
||||||
testl $(VM_MASK | 3), %eax
|
andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
|
||||||
jz resume_kernel
|
cmpl $USER_RPL, %eax
|
||||||
|
jb resume_kernel # not returning to v8086 or userspace
|
||||||
ENTRY(resume_userspace)
|
ENTRY(resume_userspace)
|
||||||
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
||||||
# setting need_resched or sigpending
|
# setting need_resched or sigpending
|
||||||
|
@ -377,8 +378,8 @@ restore_all:
|
||||||
# See comments in process.c:copy_thread() for details.
|
# See comments in process.c:copy_thread() for details.
|
||||||
movb OLDSS(%esp), %ah
|
movb OLDSS(%esp), %ah
|
||||||
movb CS(%esp), %al
|
movb CS(%esp), %al
|
||||||
andl $(VM_MASK | (4 << 8) | 3), %eax
|
andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
|
||||||
cmpl $((4 << 8) | 3), %eax
|
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
|
||||||
CFI_REMEMBER_STATE
|
CFI_REMEMBER_STATE
|
||||||
je ldt_ss # returning to user-space with LDT SS
|
je ldt_ss # returning to user-space with LDT SS
|
||||||
restore_nocheck:
|
restore_nocheck:
|
||||||
|
|
|
@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
|
||||||
regs.xes = __USER_DS;
|
regs.xes = __USER_DS;
|
||||||
regs.orig_eax = -1;
|
regs.orig_eax = -1;
|
||||||
regs.eip = (unsigned long) kernel_thread_helper;
|
regs.eip = (unsigned long) kernel_thread_helper;
|
||||||
regs.xcs = __KERNEL_CS;
|
regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
||||||
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
|
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
|
||||||
|
|
||||||
/* Ok, create the new process.. */
|
/* Ok, create the new process.. */
|
||||||
|
|
|
@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
|
||||||
const struct exception_table_entry *fixup;
|
const struct exception_table_entry *fixup;
|
||||||
|
|
||||||
#ifdef CONFIG_PNPBIOS
|
#ifdef CONFIG_PNPBIOS
|
||||||
if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
|
if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
|
||||||
{
|
{
|
||||||
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
|
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
|
||||||
extern u32 pnp_bios_is_utter_crap;
|
extern u32 pnp_bios_is_utter_crap;
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#include <asm/kdebug.h>
|
#include <asm/kdebug.h>
|
||||||
|
#include <asm/segment.h>
|
||||||
|
|
||||||
extern void die(const char *,struct pt_regs *,long);
|
extern void die(const char *,struct pt_regs *,long);
|
||||||
|
|
||||||
|
@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The standard kernel/user address space limit. */
|
/* The standard kernel/user address space limit. */
|
||||||
*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
|
*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
|
||||||
|
|
||||||
/* By far the most common cases. */
|
/* By far the most common cases. */
|
||||||
if (likely(seg == __USER_CS || seg == __KERNEL_CS))
|
if (likely(SEGMENT_IS_FLAT_CODE(seg)))
|
||||||
return eip;
|
return eip;
|
||||||
|
|
||||||
/* Check the segment exists, is within the current LDT/GDT size,
|
/* Check the segment exists, is within the current LDT/GDT size,
|
||||||
|
@ -430,11 +431,7 @@ good_area:
|
||||||
write = 0;
|
write = 0;
|
||||||
switch (error_code & 3) {
|
switch (error_code & 3) {
|
||||||
default: /* 3: write, present */
|
default: /* 3: write, present */
|
||||||
#ifdef TEST_VERIFY_AREA
|
/* fall through */
|
||||||
if (regs->cs == KERNEL_CS)
|
|
||||||
printk("WP fault at %08lx\n", regs->eip);
|
|
||||||
#endif
|
|
||||||
/* fall through */
|
|
||||||
case 2: /* write, not present */
|
case 2: /* write, not present */
|
||||||
if (!(vma->vm_flags & VM_WRITE))
|
if (!(vma->vm_flags & VM_WRITE))
|
||||||
goto bad_area;
|
goto bad_area;
|
||||||
|
|
|
@ -60,6 +60,7 @@ struct pt_regs {
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
#include <asm/vm86.h>
|
#include <asm/vm86.h>
|
||||||
|
#include <asm/segment.h>
|
||||||
|
|
||||||
struct task_struct;
|
struct task_struct;
|
||||||
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
|
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
|
||||||
|
@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
|
||||||
*/
|
*/
|
||||||
static inline int user_mode(struct pt_regs *regs)
|
static inline int user_mode(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
return (regs->xcs & 3) != 0;
|
return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
|
||||||
}
|
}
|
||||||
static inline int user_mode_vm(struct pt_regs *regs)
|
static inline int user_mode_vm(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
|
return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
|
||||||
}
|
}
|
||||||
#define instruction_pointer(regs) ((regs)->eip)
|
#define instruction_pointer(regs) ((regs)->eip)
|
||||||
extern unsigned long profile_pc(struct pt_regs *regs);
|
extern unsigned long profile_pc(struct pt_regs *regs);
|
||||||
|
|
|
@ -83,6 +83,11 @@
|
||||||
|
|
||||||
#define GDT_SIZE (GDT_ENTRIES * 8)
|
#define GDT_SIZE (GDT_ENTRIES * 8)
|
||||||
|
|
||||||
|
/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
|
||||||
|
#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
|
||||||
|
/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
|
||||||
|
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
|
||||||
|
|
||||||
/* Simple and small GDT entries for booting only */
|
/* Simple and small GDT entries for booting only */
|
||||||
|
|
||||||
#define GDT_ENTRY_BOOT_CS 2
|
#define GDT_ENTRY_BOOT_CS 2
|
||||||
|
@ -112,4 +117,16 @@
|
||||||
*/
|
*/
|
||||||
#define IDT_ENTRIES 256
|
#define IDT_ENTRIES 256
|
||||||
|
|
||||||
|
/* Bottom two bits of selector give the ring privilege level */
|
||||||
|
#define SEGMENT_RPL_MASK 0x3
|
||||||
|
/* Bit 2 is table indicator (LDT/GDT) */
|
||||||
|
#define SEGMENT_TI_MASK 0x4
|
||||||
|
|
||||||
|
/* User mode is privilege level 3 */
|
||||||
|
#define USER_RPL 0x3
|
||||||
|
/* LDT segment has TI set, GDT has it cleared */
|
||||||
|
#define SEGMENT_LDT 0x4
|
||||||
|
#define SEGMENT_GDT 0x0
|
||||||
|
|
||||||
|
#define get_kernel_rpl() 0
|
||||||
#endif
|
#endif
|
||||||
|
|
Загрузка…
Ссылка в новой задаче