Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "14 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes mm/vmalloc.c: don't unconditonally use __GFP_HIGHMEM mm/mempolicy: fix use after free when calling get_mempolicy mm/cma_debug.c: fix stack corruption due to sprintf usage signal: don't remove SIGNAL_UNKILLABLE for traced tasks. mm, oom: fix potential data corruption when oom_reaper races with writer mm: fix double mmap_sem unlock on MMF_UNSTABLE enforced SIGBUS slub: fix per memcg cache leak on css offline mm: discard memblock data later test_kmod: fix description for -s -and -c parameters kmod: fix wait on recursive loop wait: add wait_event_killable_timeout() kernel/watchdog: fix Kconfig constraints for perf hardlockup watchdog mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback()
This commit is contained in:
Коммит
58d4e450a4
|
@ -114,10 +114,10 @@
|
|||
|
||||
/*
|
||||
* This is the base location for PIE (ET_DYN with INTERP) loads. On
|
||||
* 64-bit, this is raised to 4GB to leave the entire 32-bit address
|
||||
* 64-bit, this is above 4GB to leave the entire 32-bit address
|
||||
* space open for things that want to use the area for 32-bit pointers.
|
||||
*/
|
||||
#define ELF_ET_DYN_BASE 0x100000000UL
|
||||
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
|
|
@ -199,7 +199,7 @@ config PPC
|
|||
select HAVE_OPTPROBES if PPC64
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_EVENTS_NMI if PPC64
|
||||
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
|
||||
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_RCU_TABLE_FREE if SMP
|
||||
|
|
|
@ -163,7 +163,7 @@ config X86
|
|||
select HAVE_PCSPKR_PLATFORM
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_EVENTS_NMI
|
||||
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
|
||||
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
|
|
|
@ -247,11 +247,11 @@ extern int force_personality32;
|
|||
|
||||
/*
|
||||
* This is the base location for PIE (ET_DYN with INTERP) loads. On
|
||||
* 64-bit, this is raised to 4GB to leave the entire 32-bit address
|
||||
* 64-bit, this is above 4GB to leave the entire 32-bit address
|
||||
* space open for things that want to use the area for 32-bit pointers.
|
||||
*/
|
||||
#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
|
||||
0x100000000UL)
|
||||
(TASK_SIZE / 3 * 2))
|
||||
|
||||
/* This yields a mask that user programs can use to figure out what
|
||||
instruction set this CPU supports. This could be done in user space,
|
||||
|
|
|
@ -61,6 +61,7 @@ extern int memblock_debug;
|
|||
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
|
||||
#define __init_memblock __meminit
|
||||
#define __initdata_memblock __meminitdata
|
||||
void memblock_discard(void);
|
||||
#else
|
||||
#define __init_memblock
|
||||
#define __initdata_memblock
|
||||
|
@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
|
|||
int nid, ulong flags);
|
||||
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
|
||||
phys_addr_t size, phys_addr_t align);
|
||||
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
|
||||
phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
|
||||
void memblock_allow_resize(void);
|
||||
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
|
||||
int memblock_add(phys_addr_t base, phys_addr_t size);
|
||||
|
@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
|
|||
void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
|
||||
phys_addr_t *out_end);
|
||||
|
||||
void __memblock_free_early(phys_addr_t base, phys_addr_t size);
|
||||
void __memblock_free_late(phys_addr_t base, phys_addr_t size);
|
||||
|
||||
/**
|
||||
* for_each_mem_range - iterate through memblock areas from type_a and not
|
||||
* included in type_b. Or just type_a if type_b is NULL.
|
||||
|
|
|
@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait);
|
|||
extern int do_swap_account;
|
||||
#endif
|
||||
|
||||
void lock_page_memcg(struct page *page);
|
||||
struct mem_cgroup *lock_page_memcg(struct page *page);
|
||||
void __unlock_page_memcg(struct mem_cgroup *memcg);
|
||||
void unlock_page_memcg(struct page *page);
|
||||
|
||||
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
|
||||
|
@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void lock_page_memcg(struct page *page)
|
||||
static inline struct mem_cgroup *lock_page_memcg(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <uapi/linux/oom.h>
|
||||
#include <linux/sched/coredump.h> /* MMF_* */
|
||||
#include <linux/mm.h> /* VM_FAULT* */
|
||||
|
||||
struct zonelist;
|
||||
struct notifier_block;
|
||||
|
@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
|
|||
return tsk->signal->oom_mm;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether a page fault on the given mm is still reliable.
|
||||
* This is no longer true if the oom reaper started to reap the
|
||||
* address space which is reflected by MMF_UNSTABLE flag set in
|
||||
* the mm. At that moment any !shared mapping would lose the content
|
||||
* and could cause a memory corruption (zero pages instead of the
|
||||
* original content).
|
||||
*
|
||||
* User should call this before establishing a page table entry for
|
||||
* a !shared mapping and under the proper page table lock.
|
||||
*
|
||||
* Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
|
||||
*/
|
||||
static inline int check_stable_address_space(struct mm_struct *mm)
|
||||
{
|
||||
if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
|
||||
return VM_FAULT_SIGBUS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern unsigned long oom_badness(struct task_struct *p,
|
||||
struct mem_cgroup *memcg, const nodemask_t *nodemask,
|
||||
unsigned long totalpages);
|
||||
|
|
|
@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
|
|||
__ret; \
|
||||
})
|
||||
|
||||
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
|
||||
___wait_event(wq_head, ___wait_cond_timeout(condition), \
|
||||
TASK_KILLABLE, 0, timeout, \
|
||||
__ret = schedule_timeout(__ret))
|
||||
|
||||
/**
|
||||
* wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses
|
||||
* @wq_head: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @timeout: timeout, in jiffies
|
||||
*
|
||||
* The process is put to sleep (TASK_KILLABLE) until the
|
||||
* @condition evaluates to true or a kill signal is received.
|
||||
* The @condition is checked each time the waitqueue @wq_head is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* Returns:
|
||||
* 0 if the @condition evaluated to %false after the @timeout elapsed,
|
||||
* 1 if the @condition evaluated to %true after the @timeout elapsed,
|
||||
* the remaining jiffies (at least 1) if the @condition evaluated
|
||||
* to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
|
||||
* interrupted by a kill signal.
|
||||
*
|
||||
* Only kill signals interrupt this process.
|
||||
*/
|
||||
#define wait_event_killable_timeout(wq_head, condition, timeout) \
|
||||
({ \
|
||||
long __ret = timeout; \
|
||||
might_sleep(); \
|
||||
if (!___wait_cond_timeout(condition)) \
|
||||
__ret = __wait_event_killable_timeout(wq_head, \
|
||||
condition, timeout); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
|
||||
#define __wait_event_lock_irq(wq_head, condition, lock, cmd) \
|
||||
(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
||||
|
|
|
@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem);
|
|||
static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
|
||||
|
||||
/*
|
||||
* This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
|
||||
* running at the same time without returning. When this happens we
|
||||
* believe you've somehow ended up with a recursive module dependency
|
||||
* creating a loop.
|
||||
*
|
||||
* We have no option but to fail.
|
||||
*
|
||||
* Userspace should proactively try to detect and prevent these.
|
||||
*/
|
||||
#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
|
||||
|
||||
/*
|
||||
modprobe_path is set via /proc/sys.
|
||||
*/
|
||||
|
@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...)
|
|||
pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
|
||||
atomic_read(&kmod_concurrent_max),
|
||||
MAX_KMOD_CONCURRENT, module_name);
|
||||
wait_event_interruptible(kmod_wq,
|
||||
atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
|
||||
ret = wait_event_killable_timeout(kmod_wq,
|
||||
atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
|
||||
MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
|
||||
if (!ret) {
|
||||
pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
|
||||
module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
|
||||
return -ETIME;
|
||||
} else if (ret == -ERESTARTSYS) {
|
||||
pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trace_module_request(module_name, wait, _RET_IP_);
|
||||
|
|
|
@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
|||
recalc_sigpending_and_wake(t);
|
||||
}
|
||||
}
|
||||
if (action->sa.sa_handler == SIG_DFL)
|
||||
/*
|
||||
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
|
||||
* debugging to leave init killable.
|
||||
*/
|
||||
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
|
||||
t->signal->flags &= ~SIGNAL_UNKILLABLE;
|
||||
ret = specific_send_sig_info(sig, info, t);
|
||||
spin_unlock_irqrestore(&t->sighand->siglock, flags);
|
||||
|
|
|
@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx)
|
|||
char name[16];
|
||||
int u32s;
|
||||
|
||||
sprintf(name, "cma-%s", cma->name);
|
||||
scnprintf(name, sizeof(name), "cma-%s", cma->name);
|
||||
|
||||
tmp = debugfs_create_dir(name, cma_debugfs_root);
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/page_idle.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/oom.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
|||
struct mem_cgroup *memcg;
|
||||
pgtable_t pgtable;
|
||||
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
|
||||
int ret = 0;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||
|
||||
|
@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
|||
|
||||
pgtable = pte_alloc_one(vma->vm_mm, haddr);
|
||||
if (unlikely(!pgtable)) {
|
||||
mem_cgroup_cancel_charge(page, memcg, true);
|
||||
put_page(page);
|
||||
return VM_FAULT_OOM;
|
||||
ret = VM_FAULT_OOM;
|
||||
goto release;
|
||||
}
|
||||
|
||||
clear_huge_page(page, haddr, HPAGE_PMD_NR);
|
||||
|
@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
|||
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_none(*vmf->pmd))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
mem_cgroup_cancel_charge(page, memcg, true);
|
||||
put_page(page);
|
||||
pte_free(vma->vm_mm, pgtable);
|
||||
goto unlock_release;
|
||||
} else {
|
||||
pmd_t entry;
|
||||
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret)
|
||||
goto unlock_release;
|
||||
|
||||
/* Deliver the page fault to userland */
|
||||
if (userfaultfd_missing(vma)) {
|
||||
int ret;
|
||||
|
@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
|||
}
|
||||
|
||||
return 0;
|
||||
unlock_release:
|
||||
spin_unlock(vmf->ptl);
|
||||
release:
|
||||
if (pgtable)
|
||||
pte_free(vma->vm_mm, pgtable);
|
||||
mem_cgroup_cancel_charge(page, memcg, true);
|
||||
put_page(page);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
|
|||
ret = 0;
|
||||
set = false;
|
||||
if (pmd_none(*vmf->pmd)) {
|
||||
if (userfaultfd_missing(vma)) {
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret) {
|
||||
spin_unlock(vmf->ptl);
|
||||
} else if (userfaultfd_missing(vma)) {
|
||||
spin_unlock(vmf->ptl);
|
||||
ret = handle_userfault(vmf, VM_UFFD_MISSING);
|
||||
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
||||
|
|
|
@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
|
|||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
|
||||
|
||||
phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
|
||||
phys_addr_t *addr)
|
||||
/**
|
||||
* Discard memory and reserved arrays if they were allocated
|
||||
*/
|
||||
void __init memblock_discard(void)
|
||||
{
|
||||
if (memblock.reserved.regions == memblock_reserved_init_regions)
|
||||
return 0;
|
||||
phys_addr_t addr, size;
|
||||
|
||||
*addr = __pa(memblock.reserved.regions);
|
||||
if (memblock.reserved.regions != memblock_reserved_init_regions) {
|
||||
addr = __pa(memblock.reserved.regions);
|
||||
size = PAGE_ALIGN(sizeof(struct memblock_region) *
|
||||
memblock.reserved.max);
|
||||
__memblock_free_late(addr, size);
|
||||
}
|
||||
|
||||
return PAGE_ALIGN(sizeof(struct memblock_region) *
|
||||
memblock.reserved.max);
|
||||
if (memblock.memory.regions == memblock_memory_init_regions) {
|
||||
addr = __pa(memblock.memory.regions);
|
||||
size = PAGE_ALIGN(sizeof(struct memblock_region) *
|
||||
memblock.memory.max);
|
||||
__memblock_free_late(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
|
||||
phys_addr_t *addr)
|
||||
{
|
||||
if (memblock.memory.regions == memblock_memory_init_regions)
|
||||
return 0;
|
||||
|
||||
*addr = __pa(memblock.memory.regions);
|
||||
|
||||
return PAGE_ALIGN(sizeof(struct memblock_region) *
|
||||
memblock.memory.max);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
|
@ -1611,9 +1611,13 @@ cleanup:
|
|||
* @page: the page
|
||||
*
|
||||
* This function protects unlocked LRU pages from being moved to
|
||||
* another cgroup and stabilizes their page->mem_cgroup binding.
|
||||
* another cgroup.
|
||||
*
|
||||
* It ensures lifetime of the returned memcg. Caller is responsible
|
||||
* for the lifetime of the page; __unlock_page_memcg() is available
|
||||
* when @page might get freed inside the locked section.
|
||||
*/
|
||||
void lock_page_memcg(struct page *page)
|
||||
struct mem_cgroup *lock_page_memcg(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned long flags;
|
||||
|
@ -1622,18 +1626,24 @@ void lock_page_memcg(struct page *page)
|
|||
* The RCU lock is held throughout the transaction. The fast
|
||||
* path can get away without acquiring the memcg->move_lock
|
||||
* because page moving starts with an RCU grace period.
|
||||
*/
|
||||
*
|
||||
* The RCU lock also protects the memcg from being freed when
|
||||
* the page state that is going to change is the only thing
|
||||
* preventing the page itself from being freed. E.g. writeback
|
||||
* doesn't hold a page reference and relies on PG_writeback to
|
||||
* keep off truncation, migration and so forth.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
return NULL;
|
||||
again:
|
||||
memcg = page->mem_cgroup;
|
||||
if (unlikely(!memcg))
|
||||
return;
|
||||
return NULL;
|
||||
|
||||
if (atomic_read(&memcg->moving_account) <= 0)
|
||||
return;
|
||||
return memcg;
|
||||
|
||||
spin_lock_irqsave(&memcg->move_lock, flags);
|
||||
if (memcg != page->mem_cgroup) {
|
||||
|
@ -1649,18 +1659,18 @@ again:
|
|||
memcg->move_lock_task = current;
|
||||
memcg->move_lock_flags = flags;
|
||||
|
||||
return;
|
||||
return memcg;
|
||||
}
|
||||
EXPORT_SYMBOL(lock_page_memcg);
|
||||
|
||||
/**
|
||||
* unlock_page_memcg - unlock a page->mem_cgroup binding
|
||||
* @page: the page
|
||||
* __unlock_page_memcg - unlock and unpin a memcg
|
||||
* @memcg: the memcg
|
||||
*
|
||||
* Unlock and unpin a memcg returned by lock_page_memcg().
|
||||
*/
|
||||
void unlock_page_memcg(struct page *page)
|
||||
void __unlock_page_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup *memcg = page->mem_cgroup;
|
||||
|
||||
if (memcg && memcg->move_lock_task == current) {
|
||||
unsigned long flags = memcg->move_lock_flags;
|
||||
|
||||
|
@ -1672,6 +1682,15 @@ void unlock_page_memcg(struct page *page)
|
|||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* unlock_page_memcg - unlock a page->mem_cgroup binding
|
||||
* @page: the page
|
||||
*/
|
||||
void unlock_page_memcg(struct page *page)
|
||||
{
|
||||
__unlock_page_memcg(page->mem_cgroup);
|
||||
}
|
||||
EXPORT_SYMBOL(unlock_page_memcg);
|
||||
|
||||
/*
|
||||
|
|
36
mm/memory.c
36
mm/memory.c
|
@ -68,6 +68,7 @@
|
|||
#include <linux/debugfs.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/oom.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
@ -2893,6 +2894,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
|
|||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct mem_cgroup *memcg;
|
||||
struct page *page;
|
||||
int ret = 0;
|
||||
pte_t entry;
|
||||
|
||||
/* File mapping without ->vm_ops ? */
|
||||
|
@ -2925,6 +2927,9 @@ static int do_anonymous_page(struct vm_fault *vmf)
|
|||
vmf->address, &vmf->ptl);
|
||||
if (!pte_none(*vmf->pte))
|
||||
goto unlock;
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
/* Deliver the page fault to userland, check inside PT lock */
|
||||
if (userfaultfd_missing(vma)) {
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
|
@ -2959,6 +2964,10 @@ static int do_anonymous_page(struct vm_fault *vmf)
|
|||
if (!pte_none(*vmf->pte))
|
||||
goto release;
|
||||
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret)
|
||||
goto release;
|
||||
|
||||
/* Deliver the page fault to userland, check inside PT lock */
|
||||
if (userfaultfd_missing(vma)) {
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
|
@ -2978,7 +2987,7 @@ setpte:
|
|||
update_mmu_cache(vma, vmf->address, vmf->pte);
|
||||
unlock:
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return 0;
|
||||
return ret;
|
||||
release:
|
||||
mem_cgroup_cancel_charge(page, memcg, false);
|
||||
put_page(page);
|
||||
|
@ -3252,7 +3261,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
|
|||
int finish_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct page *page;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
/* Did we COW the page? */
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
|
@ -3260,7 +3269,15 @@ int finish_fault(struct vm_fault *vmf)
|
|||
page = vmf->cow_page;
|
||||
else
|
||||
page = vmf->page;
|
||||
ret = alloc_set_pte(vmf, vmf->memcg, page);
|
||||
|
||||
/*
|
||||
* check even for read faults because we might have lost our CoWed
|
||||
* page
|
||||
*/
|
||||
if (!(vmf->vma->vm_flags & VM_SHARED))
|
||||
ret = check_stable_address_space(vmf->vma->vm_mm);
|
||||
if (!ret)
|
||||
ret = alloc_set_pte(vmf, vmf->memcg, page);
|
||||
if (vmf->pte)
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return ret;
|
||||
|
@ -3900,19 +3917,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
mem_cgroup_oom_synchronize(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* This mm has been already reaped by the oom reaper and so the
|
||||
* refault cannot be trusted in general. Anonymous refaults would
|
||||
* lose data and give a zero page instead e.g. This is especially
|
||||
* problem for use_mm() because regular tasks will just die and
|
||||
* the corrupted data will not be visible anywhere while kthread
|
||||
* will outlive the oom victim and potentially propagate the date
|
||||
* further.
|
||||
*/
|
||||
if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR)
|
||||
&& test_bit(MMF_UNSTABLE, &vma->vm_mm->flags)))
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(handle_mm_fault);
|
||||
|
|
|
@ -861,11 +861,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
|
|||
*policy |= (pol->flags & MPOL_MODE_FLAGS);
|
||||
}
|
||||
|
||||
if (vma) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
vma = NULL;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
if (nmask) {
|
||||
if (mpol_store_user_nodemask(pol)) {
|
||||
|
|
|
@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void)
|
|||
NULL)
|
||||
count += __free_memory_core(start, end);
|
||||
|
||||
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
|
||||
{
|
||||
phys_addr_t size;
|
||||
|
||||
/* Free memblock.reserved array if it was allocated */
|
||||
size = get_allocated_memblock_reserved_regions_info(&start);
|
||||
if (size)
|
||||
count += __free_memory_core(start, start + size);
|
||||
|
||||
/* Free memblock.memory array if it was allocated */
|
||||
size = get_allocated_memblock_memory_regions_info(&start);
|
||||
if (size)
|
||||
count += __free_memory_core(start, start + size);
|
||||
}
|
||||
#endif
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
|
|
@ -2724,9 +2724,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
|
|||
int test_clear_page_writeback(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
int ret;
|
||||
|
||||
lock_page_memcg(page);
|
||||
memcg = lock_page_memcg(page);
|
||||
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
if (mapping && mapping_use_writeback_tags(mapping)) {
|
||||
struct inode *inode = mapping->host;
|
||||
struct backing_dev_info *bdi = inode_to_bdi(inode);
|
||||
|
@ -2754,12 +2757,18 @@ int test_clear_page_writeback(struct page *page)
|
|||
} else {
|
||||
ret = TestClearPageWriteback(page);
|
||||
}
|
||||
/*
|
||||
* NOTE: Page might be free now! Writeback doesn't hold a page
|
||||
* reference on its own, it relies on truncation to wait for
|
||||
* the clearing of PG_writeback. The below can only access
|
||||
* page state that is static across allocation cycles.
|
||||
*/
|
||||
if (ret) {
|
||||
dec_lruvec_page_state(page, NR_WRITEBACK);
|
||||
dec_lruvec_state(lruvec, NR_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
inc_node_page_state(page, NR_WRITTEN);
|
||||
}
|
||||
unlock_page_memcg(page);
|
||||
__unlock_page_memcg(memcg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void)
|
|||
/* Reinit limits that are based on free pages after the kernel is up */
|
||||
files_maxfiles_init();
|
||||
#endif
|
||||
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
|
||||
/* Discard memblock private memory */
|
||||
memblock_discard();
|
||||
#endif
|
||||
|
||||
for_each_populated_zone(zone)
|
||||
set_zone_contiguous(zone);
|
||||
|
|
|
@ -5642,13 +5642,14 @@ static void sysfs_slab_remove_workfn(struct work_struct *work)
|
|||
* A cache is never shut down before deactivation is
|
||||
* complete, so no need to worry about synchronization.
|
||||
*/
|
||||
return;
|
||||
goto out;
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
kset_unregister(s->memcg_kset);
|
||||
#endif
|
||||
kobject_uevent(&s->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&s->kobj);
|
||||
out:
|
||||
kobject_put(&s->kobj);
|
||||
}
|
||||
|
||||
|
|
13
mm/vmalloc.c
13
mm/vmalloc.c
|
@ -1671,7 +1671,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
|||
struct page **pages;
|
||||
unsigned int nr_pages, array_size, i;
|
||||
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
|
||||
const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN;
|
||||
const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
|
||||
const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
|
||||
0 :
|
||||
__GFP_HIGHMEM;
|
||||
|
||||
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
|
||||
array_size = (nr_pages * sizeof(struct page *));
|
||||
|
@ -1679,7 +1682,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
|||
area->nr_pages = nr_pages;
|
||||
/* Please note that the recursion is strictly bounded. */
|
||||
if (array_size > PAGE_SIZE) {
|
||||
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
|
||||
pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
|
||||
PAGE_KERNEL, node, area->caller);
|
||||
} else {
|
||||
pages = kmalloc_node(array_size, nested_gfp, node);
|
||||
|
@ -1700,9 +1703,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
|||
}
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
page = alloc_page(alloc_mask);
|
||||
page = alloc_page(alloc_mask|highmem_mask);
|
||||
else
|
||||
page = alloc_pages_node(node, alloc_mask, 0);
|
||||
page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
|
||||
|
||||
if (unlikely(!page)) {
|
||||
/* Successfully allocated i pages, free them in __vunmap() */
|
||||
|
@ -1710,7 +1713,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
|||
goto fail;
|
||||
}
|
||||
area->pages[i] = page;
|
||||
if (gfpflags_allow_blocking(gfp_mask))
|
||||
if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
|
|
|
@ -473,8 +473,8 @@ usage()
|
|||
echo " all Runs all tests (default)"
|
||||
echo " -t Run test ID the number amount of times is recommended"
|
||||
echo " -w Watch test ID run until it runs into an error"
|
||||
echo " -c Run test ID once"
|
||||
echo " -s Run test ID x test-count number of times"
|
||||
echo " -s Run test ID once"
|
||||
echo " -c Run test ID x test-count number of times"
|
||||
echo " -l List all test ID list"
|
||||
echo " -h|--help Help"
|
||||
echo
|
||||
|
|
Загрузка…
Ссылка в новой задаче