Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm, thp: do not cause memcg oom for thp mm/vmscan: wake up flushers for legacy cgroups too Revert "mm: page_alloc: skip over regions of invalid pfns where possible" mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink() mm/thp: do not wait for lock_page() in deferred_split_scan() mm/khugepaged.c: convert VM_BUG_ON() to collapse fail x86/mm: implement free pmd/pte page interfaces mm/vmalloc: add interfaces to free unmapped page table h8300: remove extraneous __BIG_ENDIAN definition hugetlbfs: check for pgoff value overflow lockdep: fix fs_reclaim warning MAINTAINERS: update Mark Fasheh's e-mail mm/mempolicy.c: avoid use uninitialized preferred_node
This commit is contained in:
Коммит
f36b7534b8
|
@ -10334,7 +10334,7 @@ F: drivers/oprofile/
|
|||
F: include/linux/oprofile.h
|
||||
|
||||
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
|
||||
M: Mark Fasheh <mfasheh@versity.com>
|
||||
M: Mark Fasheh <mark@fasheh.com>
|
||||
M: Joel Becker <jlbec@evilplan.org>
|
||||
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
|
||||
W: http://ocfs2.wiki.kernel.org
|
||||
|
|
|
@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp)
|
|||
pmd_clear(pmdp);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int pud_free_pmd_page(pud_t *pud)
|
||||
{
|
||||
return pud_none(*pud);
|
||||
}
|
||||
|
||||
int pmd_free_pte_page(pmd_t *pmd)
|
||||
{
|
||||
return pmd_none(*pmd);
|
||||
}
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
#ifndef __H8300_BYTEORDER_H__
|
||||
#define __H8300_BYTEORDER_H__
|
||||
|
||||
#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
|
||||
#include <linux/byteorder/big_endian.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pud_free_pmd_page - Clear pud entry and free pmd page.
|
||||
* @pud: Pointer to a PUD.
|
||||
*
|
||||
* Context: The pud range has been unmaped and TLB purged.
|
||||
* Return: 1 if clearing the entry succeeded. 0 otherwise.
|
||||
*/
|
||||
int pud_free_pmd_page(pud_t *pud)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
int i;
|
||||
|
||||
if (pud_none(*pud))
|
||||
return 1;
|
||||
|
||||
pmd = (pmd_t *)pud_page_vaddr(*pud);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PMD; i++)
|
||||
if (!pmd_free_pte_page(&pmd[i]))
|
||||
return 0;
|
||||
|
||||
pud_clear(pud);
|
||||
free_page((unsigned long)pmd);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* pmd_free_pte_page - Clear pmd entry and free pte page.
|
||||
* @pmd: Pointer to a PMD.
|
||||
*
|
||||
* Context: The pmd range has been unmaped and TLB purged.
|
||||
* Return: 1 if clearing the entry succeeded. 0 otherwise.
|
||||
*/
|
||||
int pmd_free_pte_page(pmd_t *pmd)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
if (pmd_none(*pmd))
|
||||
return 1;
|
||||
|
||||
pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||
pmd_clear(pmd);
|
||||
free_page((unsigned long)pte);
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
|
||||
|
|
|
@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
|
|||
pagevec_reinit(pvec);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mask used when checking the page offset value passed in via system
|
||||
* calls. This value will be converted to a loff_t which is signed.
|
||||
* Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
|
||||
* value. The extra bit (- 1 in the shift value) is to take the sign
|
||||
* bit into account.
|
||||
*/
|
||||
#define PGOFF_LOFFT_MAX \
|
||||
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
|
||||
|
||||
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
|
@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
vma->vm_ops = &hugetlb_vm_ops;
|
||||
|
||||
/*
|
||||
* Offset passed to mmap (before page shift) could have been
|
||||
* negative when represented as a (l)off_t.
|
||||
* page based offset in vm_pgoff could be sufficiently large to
|
||||
* overflow a (l)off_t when converted to byte offset.
|
||||
*/
|
||||
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
|
||||
if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/* must be huge page aligned */
|
||||
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
|
||||
|
|
|
@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
|
|||
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
|
||||
int pud_clear_huge(pud_t *pud);
|
||||
int pmd_clear_huge(pmd_t *pmd);
|
||||
int pud_free_pmd_page(pud_t *pud);
|
||||
int pmd_free_pte_page(pmd_t *pmd);
|
||||
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
|
||||
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
|
||||
{
|
||||
|
@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int pud_free_pmd_page(pud_t *pud)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int pmd_free_pte_page(pmd_t *pmd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
|
||||
|
||||
#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
|
||||
|
|
|
@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
|
|||
unsigned long *end_pfn);
|
||||
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
|
||||
unsigned long *out_end_pfn, int *out_nid);
|
||||
unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
|
||||
|
||||
/**
|
||||
* for_each_mem_pfn_range - early memory pfn range iterator
|
||||
|
|
|
@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
|
|||
|
||||
if (ioremap_pmd_enabled() &&
|
||||
((next - addr) == PMD_SIZE) &&
|
||||
IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
|
||||
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
|
||||
pmd_free_pte_page(pmd)) {
|
||||
if (pmd_set_huge(pmd, phys_addr + addr, prot))
|
||||
continue;
|
||||
}
|
||||
|
@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
|
|||
|
||||
if (ioremap_pud_enabled() &&
|
||||
((next - addr) == PUD_SIZE) &&
|
||||
IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
|
||||
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
|
||||
pud_free_pmd_page(pud)) {
|
||||
if (pud_set_huge(pud, phys_addr + addr, prot))
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
|||
|
||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||
|
||||
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
|
||||
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
|
||||
true)) {
|
||||
put_page(page);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
@ -1316,7 +1317,7 @@ alloc:
|
|||
}
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
|
||||
huge_gfp, &memcg, true))) {
|
||||
huge_gfp | __GFP_NORETRY, &memcg, true))) {
|
||||
put_page(new_page);
|
||||
split_huge_pmd(vma, vmf->pmd, vmf->address);
|
||||
if (page)
|
||||
|
@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
|
|||
|
||||
list_for_each_safe(pos, next, &list) {
|
||||
page = list_entry((void *)pos, struct page, mapping);
|
||||
lock_page(page);
|
||||
if (!trylock_page(page))
|
||||
goto next;
|
||||
/* split_huge_page() removes page from list on success */
|
||||
if (!split_huge_page(page))
|
||||
split++;
|
||||
unlock_page(page);
|
||||
next:
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <linux/bootmem.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/string_helpers.h>
|
||||
|
@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
|
|||
struct resv_map *resv_map;
|
||||
long gbl_reserve;
|
||||
|
||||
/* This should never happen */
|
||||
if (from > to) {
|
||||
VM_WARN(1, "%s called with a negative range\n", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only apply hugepage reservation if asked. At fault time, an
|
||||
* attempt will be made for VM_NORESERVE to allocate a page
|
||||
|
|
|
@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
|||
goto out;
|
||||
}
|
||||
|
||||
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||
/* TODO: teach khugepaged to collapse THP mapped with pte */
|
||||
if (PageCompound(page)) {
|
||||
result = SCAN_PAGE_COMPOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
VM_BUG_ON_PAGE(!PageAnon(page), page);
|
||||
|
||||
/*
|
||||
|
@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||
goto out_nolock;
|
||||
}
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
|
||||
/* Do not oom kill for khugepaged charges */
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
|
||||
&memcg, true))) {
|
||||
result = SCAN_CGROUP_CHARGE_FAIL;
|
||||
goto out_nolock;
|
||||
}
|
||||
|
@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
|
||||
/* Do not oom kill for khugepaged charges */
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
|
||||
&memcg, true))) {
|
||||
result = SCAN_CGROUP_CHARGE_FAIL;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
|
|||
*out_nid = r->nid;
|
||||
}
|
||||
|
||||
unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
|
||||
unsigned long max_pfn)
|
||||
{
|
||||
struct memblock_type *type = &memblock.memory;
|
||||
unsigned int right = type->cnt;
|
||||
unsigned int mid, left = 0;
|
||||
phys_addr_t addr = PFN_PHYS(++pfn);
|
||||
|
||||
do {
|
||||
mid = (right + left) / 2;
|
||||
|
||||
if (addr < type->regions[mid].base)
|
||||
right = mid;
|
||||
else if (addr >= (type->regions[mid].base +
|
||||
type->regions[mid].size))
|
||||
left = mid + 1;
|
||||
else {
|
||||
/* addr is within the region, so pfn is valid */
|
||||
return pfn;
|
||||
}
|
||||
} while (left < right);
|
||||
|
||||
if (right == type->cnt)
|
||||
return -1UL;
|
||||
else
|
||||
return PHYS_PFN(type->regions[right].base);
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_set_node - set node ID on memblock regions
|
||||
* @base: base of area to set node ID for
|
||||
|
|
|
@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
|||
case MPOL_INTERLEAVE:
|
||||
return !!nodes_equal(a->v.nodes, b->v.nodes);
|
||||
case MPOL_PREFERRED:
|
||||
/* a's ->flags is the same as b's */
|
||||
if (a->flags & MPOL_F_LOCAL)
|
||||
return true;
|
||||
return a->v.preferred_node == b->v.preferred_node;
|
||||
default:
|
||||
BUG();
|
||||
|
|
|
@ -3596,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
|
|||
return false;
|
||||
|
||||
/* this guy won't enter reclaim */
|
||||
if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
|
||||
if (current->flags & PF_MEMALLOC)
|
||||
return false;
|
||||
|
||||
/* We're only interested __GFP_FS allocations for now */
|
||||
|
@ -5356,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
|||
if (context != MEMMAP_EARLY)
|
||||
goto not_early;
|
||||
|
||||
if (!early_pfn_valid(pfn)) {
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
/*
|
||||
* Skip to the pfn preceding the next valid one (or
|
||||
* end_pfn), such that we hit a valid pfn (or end_pfn)
|
||||
* on our next iteration of the loop.
|
||||
*/
|
||||
pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
|
||||
#endif
|
||||
if (!early_pfn_valid(pfn))
|
||||
continue;
|
||||
}
|
||||
if (!early_pfn_in_nid(pfn, nid))
|
||||
continue;
|
||||
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
|
||||
|
|
31
mm/shmem.c
31
mm/shmem.c
|
@ -493,36 +493,45 @@ next:
|
|||
info = list_entry(pos, struct shmem_inode_info, shrinklist);
|
||||
inode = &info->vfs_inode;
|
||||
|
||||
if (nr_to_split && split >= nr_to_split) {
|
||||
iput(inode);
|
||||
continue;
|
||||
}
|
||||
if (nr_to_split && split >= nr_to_split)
|
||||
goto leave;
|
||||
|
||||
page = find_lock_page(inode->i_mapping,
|
||||
page = find_get_page(inode->i_mapping,
|
||||
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
|
||||
if (!page)
|
||||
goto drop;
|
||||
|
||||
/* No huge page at the end of the file: nothing to split */
|
||||
if (!PageTransHuge(page)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Leave the inode on the list if we failed to lock
|
||||
* the page at this time.
|
||||
*
|
||||
* Waiting for the lock may lead to deadlock in the
|
||||
* reclaim path.
|
||||
*/
|
||||
if (!trylock_page(page)) {
|
||||
put_page(page);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
ret = split_huge_page(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
|
||||
if (ret) {
|
||||
/* split failed: leave it on the list */
|
||||
iput(inode);
|
||||
continue;
|
||||
}
|
||||
/* If split failed leave the inode on the list */
|
||||
if (ret)
|
||||
goto leave;
|
||||
|
||||
split++;
|
||||
drop:
|
||||
list_del_init(&info->shrinklist);
|
||||
removed++;
|
||||
leave:
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
|
|
31
mm/vmscan.c
31
mm/vmscan.c
|
@ -1779,6 +1779,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||
if (stat.nr_writeback && stat.nr_writeback == nr_taken)
|
||||
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
|
||||
|
||||
/*
|
||||
* If dirty pages are scanned that are not queued for IO, it
|
||||
* implies that flushers are not doing their job. This can
|
||||
* happen when memory pressure pushes dirty pages to the end of
|
||||
* the LRU before the dirty limits are breached and the dirty
|
||||
* data has expired. It can also happen when the proportion of
|
||||
* dirty pages grows not through writes but through memory
|
||||
* pressure reclaiming all the clean cache. And in some cases,
|
||||
* the flushers simply cannot keep up with the allocation
|
||||
* rate. Nudge the flusher threads in case they are asleep.
|
||||
*/
|
||||
if (stat.nr_unqueued_dirty == nr_taken)
|
||||
wakeup_flusher_threads(WB_REASON_VMSCAN);
|
||||
|
||||
/*
|
||||
* Legacy memcg will stall in page writeback so avoid forcibly
|
||||
* stalling here.
|
||||
|
@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
|||
if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
|
||||
set_bit(PGDAT_CONGESTED, &pgdat->flags);
|
||||
|
||||
/*
|
||||
* If dirty pages are scanned that are not queued for IO, it
|
||||
* implies that flushers are not doing their job. This can
|
||||
* happen when memory pressure pushes dirty pages to the end of
|
||||
* the LRU before the dirty limits are breached and the dirty
|
||||
* data has expired. It can also happen when the proportion of
|
||||
* dirty pages grows not through writes but through memory
|
||||
* pressure reclaiming all the clean cache. And in some cases,
|
||||
* the flushers simply cannot keep up with the allocation
|
||||
* rate. Nudge the flusher threads in case they are asleep, but
|
||||
* also allow kswapd to start writing pages during reclaim.
|
||||
*/
|
||||
if (stat.nr_unqueued_dirty == nr_taken) {
|
||||
wakeup_flusher_threads(WB_REASON_VMSCAN);
|
||||
/* Allow kswapd to start writing pages during reclaim. */
|
||||
if (stat.nr_unqueued_dirty == nr_taken)
|
||||
set_bit(PGDAT_DIRTY, &pgdat->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* If kswapd scans pages marked marked for immediate
|
||||
|
|
Загрузка…
Ссылка в новой задаче