s390: KVM preparation: provide hook to enable pgstes in user pagetable
The SIE instruction on s390 uses the 2nd half of the page table page to virtualize the storage keys of a guest. This patch offers the s390_enable_sie function, which reorganizes the page tables of a single-threaded process to reserve space in the page table: s390_enable_sie makes sure that the process is single threaded and then uses dup_mm to create a new mm with reorganized page tables. The old mm is freed and the process has now a page status extended field after every page table. Code that wants to exploit pgstes should SELECT CONFIG_PGSTE. This patch has a small common code hit, namely making dup_mm non-static. Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's review feedback. Now we do have the prototype for dup_mm in include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now call task_lock() to prevent race against ptrace modification of mm_users. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
This commit is contained in:
Родитель
37817f2982
Коммит
402b08622d
|
@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
|
||||||
default y
|
default y
|
||||||
depends on SMP && PREEMPT
|
depends on SMP && PREEMPT
|
||||||
|
|
||||||
|
config PGSTE
|
||||||
|
bool
|
||||||
|
default y if KVM
|
||||||
|
|
||||||
mainmenu "Linux Kernel Configuration"
|
mainmenu "Linux Kernel Configuration"
|
||||||
|
|
||||||
config S390
|
config S390
|
||||||
|
|
|
@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
|
||||||
early_param("ipldelay", early_parse_ipldelay);
|
early_param("ipldelay", early_parse_ipldelay);
|
||||||
|
|
||||||
#ifdef CONFIG_S390_SWITCH_AMODE
|
#ifdef CONFIG_S390_SWITCH_AMODE
|
||||||
|
#ifdef CONFIG_PGSTE
|
||||||
|
unsigned int switch_amode = 1;
|
||||||
|
#else
|
||||||
unsigned int switch_amode = 0;
|
unsigned int switch_amode = 0;
|
||||||
|
#endif
|
||||||
EXPORT_SYMBOL_GPL(switch_amode);
|
EXPORT_SYMBOL_GPL(switch_amode);
|
||||||
|
|
||||||
static void set_amode_and_uaccess(unsigned long user_amode,
|
static void set_amode_and_uaccess(unsigned long user_amode,
|
||||||
|
|
|
@ -30,11 +30,27 @@
|
||||||
#define TABLES_PER_PAGE 4
|
#define TABLES_PER_PAGE 4
|
||||||
#define FRAG_MASK 15UL
|
#define FRAG_MASK 15UL
|
||||||
#define SECOND_HALVES 10UL
|
#define SECOND_HALVES 10UL
|
||||||
|
|
||||||
|
void clear_table_pgstes(unsigned long *table)
|
||||||
|
{
|
||||||
|
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
|
||||||
|
memset(table + 256, 0, PAGE_SIZE/4);
|
||||||
|
clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
|
||||||
|
memset(table + 768, 0, PAGE_SIZE/4);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define ALLOC_ORDER 2
|
#define ALLOC_ORDER 2
|
||||||
#define TABLES_PER_PAGE 2
|
#define TABLES_PER_PAGE 2
|
||||||
#define FRAG_MASK 3UL
|
#define FRAG_MASK 3UL
|
||||||
#define SECOND_HALVES 2UL
|
#define SECOND_HALVES 2UL
|
||||||
|
|
||||||
|
void clear_table_pgstes(unsigned long *table)
|
||||||
|
{
|
||||||
|
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
|
||||||
|
memset(table + 256, 0, PAGE_SIZE/2);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
|
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
|
||||||
|
@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
|
||||||
unsigned long *table;
|
unsigned long *table;
|
||||||
unsigned long bits;
|
unsigned long bits;
|
||||||
|
|
||||||
bits = mm->context.noexec ? 3UL : 1UL;
|
bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
page = NULL;
|
page = NULL;
|
||||||
if (!list_empty(&mm->context.pgtable_list)) {
|
if (!list_empty(&mm->context.pgtable_list)) {
|
||||||
|
@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
|
||||||
pgtable_page_ctor(page);
|
pgtable_page_ctor(page);
|
||||||
page->flags &= ~FRAG_MASK;
|
page->flags &= ~FRAG_MASK;
|
||||||
table = (unsigned long *) page_to_phys(page);
|
table = (unsigned long *) page_to_phys(page);
|
||||||
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
|
if (mm->context.pgstes)
|
||||||
|
clear_table_pgstes(table);
|
||||||
|
else
|
||||||
|
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
list_add(&page->lru, &mm->context.pgtable_list);
|
list_add(&page->lru, &mm->context.pgtable_list);
|
||||||
}
|
}
|
||||||
|
@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long bits;
|
unsigned long bits;
|
||||||
|
|
||||||
bits = mm->context.noexec ? 3UL : 1UL;
|
bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
|
||||||
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
|
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
|
||||||
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
|
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
|
@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
|
||||||
mm->context.noexec = 0;
|
mm->context.noexec = 0;
|
||||||
update_mm(mm, tsk);
|
update_mm(mm, tsk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* switch on pgstes for its userspace process (for kvm)
|
||||||
|
*/
|
||||||
|
int s390_enable_sie(void)
|
||||||
|
{
|
||||||
|
struct task_struct *tsk = current;
|
||||||
|
struct mm_struct *mm;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
task_lock(tsk);
|
||||||
|
|
||||||
|
rc = 0;
|
||||||
|
if (tsk->mm->context.pgstes)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
rc = -EINVAL;
|
||||||
|
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
|
||||||
|
tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
|
||||||
|
mm = dup_mm(tsk);
|
||||||
|
tsk->mm->context.pgstes = 0;
|
||||||
|
|
||||||
|
rc = -ENOMEM;
|
||||||
|
if (!mm)
|
||||||
|
goto unlock;
|
||||||
|
mmput(tsk->mm);
|
||||||
|
tsk->mm = tsk->active_mm = mm;
|
||||||
|
preempt_disable();
|
||||||
|
update_mm(mm, tsk);
|
||||||
|
cpu_set(smp_processor_id(), mm->cpu_vm_mask);
|
||||||
|
preempt_enable();
|
||||||
|
rc = 0;
|
||||||
|
unlock:
|
||||||
|
task_unlock(tsk);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(s390_enable_sie);
|
||||||
|
|
|
@ -7,6 +7,7 @@ typedef struct {
|
||||||
unsigned long asce_bits;
|
unsigned long asce_bits;
|
||||||
unsigned long asce_limit;
|
unsigned long asce_limit;
|
||||||
int noexec;
|
int noexec;
|
||||||
|
int pgstes;
|
||||||
} mm_context_t;
|
} mm_context_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
|
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
|
||||||
#endif
|
#endif
|
||||||
mm->context.noexec = s390_noexec;
|
if (current->mm->context.pgstes) {
|
||||||
|
mm->context.noexec = 0;
|
||||||
|
mm->context.pgstes = 1;
|
||||||
|
} else {
|
||||||
|
mm->context.noexec = s390_noexec;
|
||||||
|
mm->context.pgstes = 0;
|
||||||
|
}
|
||||||
mm->context.asce_limit = STACK_TOP_MAX;
|
mm->context.asce_limit = STACK_TOP_MAX;
|
||||||
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
|
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
|
||||||
|
|
||||||
extern int add_shared_memory(unsigned long start, unsigned long size);
|
extern int add_shared_memory(unsigned long start, unsigned long size);
|
||||||
extern int remove_shared_memory(unsigned long start, unsigned long size);
|
extern int remove_shared_memory(unsigned long start, unsigned long size);
|
||||||
|
extern int s390_enable_sie(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No page table caches to initialise
|
* No page table caches to initialise
|
||||||
|
|
|
@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *);
|
||||||
extern struct mm_struct *get_task_mm(struct task_struct *task);
|
extern struct mm_struct *get_task_mm(struct task_struct *task);
|
||||||
/* Remove the current tasks stale references to the old mm_struct */
|
/* Remove the current tasks stale references to the old mm_struct */
|
||||||
extern void mm_release(struct task_struct *, struct mm_struct *);
|
extern void mm_release(struct task_struct *, struct mm_struct *);
|
||||||
|
/* Allocate a new mm structure and copy contents from tsk->mm */
|
||||||
|
extern struct mm_struct *dup_mm(struct task_struct *tsk);
|
||||||
|
|
||||||
extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
|
extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
|
||||||
extern void flush_thread(void);
|
extern void flush_thread(void);
|
||||||
|
|
|
@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||||
* Allocate a new mm structure and copy contents from the
|
* Allocate a new mm structure and copy contents from the
|
||||||
* mm structure of the passed in task structure.
|
* mm structure of the passed in task structure.
|
||||||
*/
|
*/
|
||||||
static struct mm_struct *dup_mm(struct task_struct *tsk)
|
struct mm_struct *dup_mm(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm, *oldmm = current->mm;
|
struct mm_struct *mm, *oldmm = current->mm;
|
||||||
int err;
|
int err;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче