Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton: "Subsystems affected by this patch series: mm (memcg, migration, pagemap, gup, madvise, vmalloc), ia64, and misc" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits) mm: remove duplicate include statement in mmu.c mm: remove the filename in the top of file comment in vmalloc.c mm: cleanup the gfp_mask handling in __vmalloc_area_node mm: remove alloc_vm_area x86/xen: open code alloc_vm_area in arch_gnttab_valloc xen/xenbus: use apply_to_page_range directly in xenbus_map_ring_pv drm/i915: use vmap in i915_gem_object_map drm/i915: stop using kmap in i915_gem_object_map drm/i915: use vmap in shmem_pin_map zsmalloc: switch from alloc_vm_area to get_vm_area mm: allow a NULL fn callback in apply_to_page_range mm: add a vmap_pfn function mm: add a VM_MAP_PUT_PAGES flag for vmap mm: update the documentation for vfree mm/madvise: introduce process_madvise() syscall: an external memory hinting API pid: move pidfd_get_pid() to pid.c mm/madvise: pass mm to do_madvise selftests/vm: 10x speedup for hmm-tests binfmt_elf: take the mmap lock around find_extend_vma() mm/gup_benchmark: take the mmap lock around GUP ...
This commit is contained in:
Коммит
1912b04e0f
|
@ -479,3 +479,4 @@
|
|||
547 common openat2 sys_openat2
|
||||
548 common pidfd_getfd sys_pidfd_getfd
|
||||
549 common faccessat2 sys_faccessat2
|
||||
550 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
#include <asm/cp15.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/cachetype.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/sections.h>
|
||||
|
|
|
@ -453,3 +453,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||
|
||||
#define __NR_compat_syscalls 440
|
||||
#define __NR_compat_syscalls 441
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
|
|
@ -887,6 +887,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
|
|||
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
|
||||
#define __NR_faccessat2 439
|
||||
__SYSCALL(__NR_faccessat2, sys_faccessat2)
|
||||
#define __NR_process_madvise 440
|
||||
__SYSCALL(__NR_process_madvise, sys_process_madvise)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
|
|
@ -40,7 +40,7 @@ obj-y += esi_stub.o # must be in kernel proper
|
|||
endif
|
||||
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
|
||||
|
||||
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
|
||||
obj-$(CONFIG_ELF_CORE) += elfcore.o
|
||||
|
||||
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
|
||||
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
|
||||
|
|
|
@ -360,3 +360,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -439,3 +439,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -445,3 +445,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -378,3 +378,4 @@
|
|||
437 n32 openat2 sys_openat2
|
||||
438 n32 pidfd_getfd sys_pidfd_getfd
|
||||
439 n32 faccessat2 sys_faccessat2
|
||||
440 n32 process_madvise sys_process_madvise
|
||||
|
|
|
@ -354,3 +354,4 @@
|
|||
437 n64 openat2 sys_openat2
|
||||
438 n64 pidfd_getfd sys_pidfd_getfd
|
||||
439 n64 faccessat2 sys_faccessat2
|
||||
440 n64 process_madvise sys_process_madvise
|
||||
|
|
|
@ -427,3 +427,4 @@
|
|||
437 o32 openat2 sys_openat2
|
||||
438 o32 pidfd_getfd sys_pidfd_getfd
|
||||
439 o32 faccessat2 sys_faccessat2
|
||||
440 o32 process_madvise sys_process_madvise
|
||||
|
|
|
@ -437,3 +437,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -529,3 +529,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -442,3 +442,4 @@
|
|||
437 common openat2 sys_openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise sys_process_madvise
|
||||
|
|
|
@ -442,3 +442,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -485,3 +485,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -444,3 +444,4 @@
|
|||
437 i386 openat2 sys_openat2
|
||||
438 i386 pidfd_getfd sys_pidfd_getfd
|
||||
439 i386 faccessat2 sys_faccessat2
|
||||
440 i386 process_madvise sys_process_madvise
|
||||
|
|
|
@ -361,6 +361,7 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
static struct gnttab_vm_area {
|
||||
struct vm_struct *area;
|
||||
pte_t **ptes;
|
||||
int idx;
|
||||
} gnttab_shared_vm_area, gnttab_status_vm_area;
|
||||
|
||||
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
|
||||
|
@ -90,19 +91,31 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
|
|||
}
|
||||
}
|
||||
|
||||
static int gnttab_apply(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct gnttab_vm_area *area = data;
|
||||
|
||||
area->ptes[area->idx++] = pte;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
|
||||
{
|
||||
area->ptes = kmalloc_array(nr_frames, sizeof(*area->ptes), GFP_KERNEL);
|
||||
if (area->ptes == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
|
||||
if (area->area == NULL) {
|
||||
kfree(area->ptes);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
area->area = get_vm_area(PAGE_SIZE * nr_frames, VM_IOREMAP);
|
||||
if (!area->area)
|
||||
goto out_free_ptes;
|
||||
if (apply_to_page_range(&init_mm, (unsigned long)area->area->addr,
|
||||
PAGE_SIZE * nr_frames, gnttab_apply, area))
|
||||
goto out_free_vm_area;
|
||||
return 0;
|
||||
out_free_vm_area:
|
||||
free_vm_area(area->area);
|
||||
out_free_ptes:
|
||||
kfree(area->ptes);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void arch_gnttab_vfree(struct gnttab_vm_area *area)
|
||||
|
|
|
@ -410,3 +410,4 @@
|
|||
437 common openat2 sys_openat2
|
||||
438 common pidfd_getfd sys_pidfd_getfd
|
||||
439 common faccessat2 sys_faccessat2
|
||||
440 common process_madvise sys_process_madvise
|
||||
|
|
|
@ -25,6 +25,7 @@ config DRM_I915
|
|||
select CRC32
|
||||
select SND_HDA_I915 if SND_HDA_CORE
|
||||
select CEC_CORE if CEC_NOTIFIER
|
||||
select VMAP_PFN
|
||||
help
|
||||
Choose this option if you have a system that has "Intel Graphics
|
||||
Media Accelerator" or "HD Graphics" integrated graphics,
|
||||
|
|
|
@ -162,8 +162,6 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
|
|||
{
|
||||
if (is_vmalloc_addr(ptr))
|
||||
vunmap(ptr);
|
||||
else
|
||||
kunmap(kmap_to_page(ptr));
|
||||
}
|
||||
|
||||
struct sg_table *
|
||||
|
@ -234,34 +232,21 @@ unlock:
|
|||
return err;
|
||||
}
|
||||
|
||||
static inline pte_t iomap_pte(resource_size_t base,
|
||||
dma_addr_t offset,
|
||||
pgprot_t prot)
|
||||
{
|
||||
return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
|
||||
}
|
||||
|
||||
/* The 'mapping' part of i915_gem_object_pin_map() below */
|
||||
static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
|
||||
enum i915_map_type type)
|
||||
static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
|
||||
enum i915_map_type type)
|
||||
{
|
||||
unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
|
||||
struct sg_table *sgt = obj->mm.pages;
|
||||
pte_t *stack[32], **mem;
|
||||
struct vm_struct *area;
|
||||
unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i;
|
||||
struct page *stack[32], **pages = stack, *page;
|
||||
struct sgt_iter iter;
|
||||
pgprot_t pgprot;
|
||||
void *vaddr;
|
||||
|
||||
if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
|
||||
return NULL;
|
||||
|
||||
if (GEM_WARN_ON(type == I915_MAP_WC &&
|
||||
!static_cpu_has(X86_FEATURE_PAT)))
|
||||
return NULL;
|
||||
|
||||
/* A single page can always be kmapped */
|
||||
if (n_pte == 1 && type == I915_MAP_WB) {
|
||||
struct page *page = sg_page(sgt->sgl);
|
||||
|
||||
switch (type) {
|
||||
default:
|
||||
MISSING_CASE(type);
|
||||
fallthrough; /* to use PAGE_KERNEL anyway */
|
||||
case I915_MAP_WB:
|
||||
/*
|
||||
* On 32b, highmem using a finite set of indirect PTE (i.e.
|
||||
* vmap) to provide virtual mappings of the high pages.
|
||||
|
@ -277,33 +262,10 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
|
|||
* forever.
|
||||
*
|
||||
* So if the page is beyond the 32b boundary, make an explicit
|
||||
* vmap. On 64b, this check will be optimised away as we can
|
||||
* directly kmap any page on the system.
|
||||
* vmap.
|
||||
*/
|
||||
if (!PageHighMem(page))
|
||||
return kmap(page);
|
||||
}
|
||||
|
||||
mem = stack;
|
||||
if (n_pte > ARRAY_SIZE(stack)) {
|
||||
/* Too big for stack -- allocate temporary array instead */
|
||||
mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
|
||||
if (!mem)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
area = alloc_vm_area(obj->base.size, mem);
|
||||
if (!area) {
|
||||
if (mem != stack)
|
||||
kvfree(mem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
default:
|
||||
MISSING_CASE(type);
|
||||
fallthrough; /* to use PAGE_KERNEL anyway */
|
||||
case I915_MAP_WB:
|
||||
if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl)))
|
||||
return page_address(sg_page(obj->mm.pages->sgl));
|
||||
pgprot = PAGE_KERNEL;
|
||||
break;
|
||||
case I915_MAP_WC:
|
||||
|
@ -311,30 +273,50 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
|
|||
break;
|
||||
}
|
||||
|
||||
if (i915_gem_object_has_struct_page(obj)) {
|
||||
struct sgt_iter iter;
|
||||
struct page *page;
|
||||
pte_t **ptes = mem;
|
||||
|
||||
for_each_sgt_page(page, iter, sgt)
|
||||
**ptes++ = mk_pte(page, pgprot);
|
||||
} else {
|
||||
resource_size_t iomap;
|
||||
struct sgt_iter iter;
|
||||
pte_t **ptes = mem;
|
||||
dma_addr_t addr;
|
||||
|
||||
iomap = obj->mm.region->iomap.base;
|
||||
iomap -= obj->mm.region->region.start;
|
||||
|
||||
for_each_sgt_daddr(addr, iter, sgt)
|
||||
**ptes++ = iomap_pte(iomap, addr, pgprot);
|
||||
if (n_pages > ARRAY_SIZE(stack)) {
|
||||
/* Too big for stack -- allocate temporary array instead */
|
||||
pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (mem != stack)
|
||||
kvfree(mem);
|
||||
i = 0;
|
||||
for_each_sgt_page(page, iter, obj->mm.pages)
|
||||
pages[i++] = page;
|
||||
vaddr = vmap(pages, n_pages, 0, pgprot);
|
||||
if (pages != stack)
|
||||
kvfree(pages);
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
return area->addr;
|
||||
static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
|
||||
enum i915_map_type type)
|
||||
{
|
||||
resource_size_t iomap = obj->mm.region->iomap.base -
|
||||
obj->mm.region->region.start;
|
||||
unsigned long n_pfn = obj->base.size >> PAGE_SHIFT;
|
||||
unsigned long stack[32], *pfns = stack, i;
|
||||
struct sgt_iter iter;
|
||||
dma_addr_t addr;
|
||||
void *vaddr;
|
||||
|
||||
if (type != I915_MAP_WC)
|
||||
return NULL;
|
||||
|
||||
if (n_pfn > ARRAY_SIZE(stack)) {
|
||||
/* Too big for stack -- allocate temporary array instead */
|
||||
pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
|
||||
if (!pfns)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for_each_sgt_daddr(addr, iter, obj->mm.pages)
|
||||
pfns[i++] = (iomap + addr) >> PAGE_SHIFT;
|
||||
vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
|
||||
if (pfns != stack)
|
||||
kvfree(pfns);
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
/* get, pin, and map the pages of the object into kernel space */
|
||||
|
@ -386,7 +368,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
|
|||
}
|
||||
|
||||
if (!ptr) {
|
||||
ptr = i915_gem_object_map(obj, type);
|
||||
if (GEM_WARN_ON(type == I915_MAP_WC &&
|
||||
!static_cpu_has(X86_FEATURE_PAT)))
|
||||
ptr = NULL;
|
||||
else if (i915_gem_object_has_struct_page(obj))
|
||||
ptr = i915_gem_object_map_page(obj, type);
|
||||
else
|
||||
ptr = i915_gem_object_map_pfn(obj, type);
|
||||
if (!ptr) {
|
||||
err = -ENOMEM;
|
||||
goto err_unpin;
|
||||
|
|
|
@ -49,80 +49,40 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
|
|||
return file;
|
||||
}
|
||||
|
||||
static size_t shmem_npte(struct file *file)
|
||||
{
|
||||
return file->f_mapping->host->i_size >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
|
||||
{
|
||||
unsigned long pfn;
|
||||
|
||||
vunmap(ptr);
|
||||
|
||||
for (pfn = 0; pfn < n_pte; pfn++) {
|
||||
struct page *page;
|
||||
|
||||
page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
|
||||
GFP_KERNEL);
|
||||
if (!WARN_ON(IS_ERR(page))) {
|
||||
put_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void *shmem_pin_map(struct file *file)
|
||||
{
|
||||
const size_t n_pte = shmem_npte(file);
|
||||
pte_t *stack[32], **ptes, **mem;
|
||||
struct vm_struct *area;
|
||||
unsigned long pfn;
|
||||
struct page **pages;
|
||||
size_t n_pages, i;
|
||||
void *vaddr;
|
||||
|
||||
mem = stack;
|
||||
if (n_pte > ARRAY_SIZE(stack)) {
|
||||
mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
|
||||
if (!mem)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
|
||||
if (!area) {
|
||||
if (mem != stack)
|
||||
kvfree(mem);
|
||||
n_pages = file->f_mapping->host->i_size >> PAGE_SHIFT;
|
||||
pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptes = mem;
|
||||
for (pfn = 0; pfn < n_pte; pfn++) {
|
||||
struct page *page;
|
||||
|
||||
page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
|
||||
GFP_KERNEL);
|
||||
if (IS_ERR(page))
|
||||
for (i = 0; i < n_pages; i++) {
|
||||
pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
|
||||
GFP_KERNEL);
|
||||
if (IS_ERR(pages[i]))
|
||||
goto err_page;
|
||||
|
||||
**ptes++ = mk_pte(page, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
if (mem != stack)
|
||||
kvfree(mem);
|
||||
|
||||
vaddr = vmap(pages, n_pages, VM_MAP_PUT_PAGES, PAGE_KERNEL);
|
||||
if (!vaddr)
|
||||
goto err_page;
|
||||
mapping_set_unevictable(file->f_mapping);
|
||||
return area->addr;
|
||||
|
||||
return vaddr;
|
||||
err_page:
|
||||
if (mem != stack)
|
||||
kvfree(mem);
|
||||
|
||||
__shmem_unpin_map(file, area->addr, pfn);
|
||||
while (--i >= 0)
|
||||
put_page(pages[i]);
|
||||
kvfree(pages);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void shmem_unpin_map(struct file *file, void *ptr)
|
||||
{
|
||||
mapping_clear_unevictable(file->f_mapping);
|
||||
__shmem_unpin_map(file, ptr, shmem_npte(file));
|
||||
vfree(ptr);
|
||||
}
|
||||
|
||||
static int __shmem_rw(struct file *file, loff_t off,
|
||||
|
|
|
@ -73,16 +73,13 @@ struct map_ring_valloc {
|
|||
struct xenbus_map_node *node;
|
||||
|
||||
/* Why do we need two arrays? See comment of __xenbus_map_ring */
|
||||
union {
|
||||
unsigned long addrs[XENBUS_MAX_RING_GRANTS];
|
||||
pte_t *ptes[XENBUS_MAX_RING_GRANTS];
|
||||
};
|
||||
unsigned long addrs[XENBUS_MAX_RING_GRANTS];
|
||||
phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
|
||||
|
||||
struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
|
||||
struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
|
||||
|
||||
unsigned int idx; /* HVM only. */
|
||||
unsigned int idx;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(xenbus_valloc_lock);
|
||||
|
@ -686,6 +683,14 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
|
|||
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct map_ring_valloc *info = data;
|
||||
|
||||
info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xenbus_map_ring_pv(struct xenbus_device *dev,
|
||||
struct map_ring_valloc *info,
|
||||
grant_ref_t *gnt_refs,
|
||||
|
@ -694,18 +699,15 @@ static int xenbus_map_ring_pv(struct xenbus_device *dev,
|
|||
{
|
||||
struct xenbus_map_node *node = info->node;
|
||||
struct vm_struct *area;
|
||||
int err = GNTST_okay;
|
||||
int i;
|
||||
bool leaked;
|
||||
bool leaked = false;
|
||||
int err = -ENOMEM;
|
||||
|
||||
area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, info->ptes);
|
||||
area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
|
||||
if (!area)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr_grefs; i++)
|
||||
info->phys_addrs[i] =
|
||||
arbitrary_virt_to_machine(info->ptes[i]).maddr;
|
||||
|
||||
if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
|
||||
XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
|
||||
goto failed;
|
||||
err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
|
||||
info, GNTMAP_host_map | GNTMAP_contains_pte,
|
||||
&leaked);
|
||||
|
|
|
@ -310,7 +310,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
|
|||
* Grow the stack manually; some architectures have a limit on how
|
||||
* far ahead a user-space access may be in order to grow the stack.
|
||||
*/
|
||||
if (mmap_read_lock_killable(mm))
|
||||
return -EINTR;
|
||||
vma = find_extend_vma(mm, bprm->p);
|
||||
mmap_read_unlock(mm);
|
||||
if (!vma)
|
||||
return -EFAULT;
|
||||
|
||||
|
|
|
@ -842,13 +842,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
|
|||
struct buffer_head *bh, *head;
|
||||
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
|
||||
long offset;
|
||||
struct mem_cgroup *memcg;
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
|
||||
if (retry)
|
||||
gfp |= __GFP_NOFAIL;
|
||||
|
||||
memcg = get_mem_cgroup_from_page(page);
|
||||
memalloc_use_memcg(memcg);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
|
||||
head = NULL;
|
||||
offset = PAGE_SIZE;
|
||||
|
@ -867,7 +867,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
|
|||
set_bh_page(bh, page, offset);
|
||||
}
|
||||
out:
|
||||
memalloc_unuse_memcg();
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
return head;
|
||||
/*
|
||||
|
|
|
@ -3989,7 +3989,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
|
|||
if (force_nonblock)
|
||||
return -EAGAIN;
|
||||
|
||||
ret = do_madvise(ma->addr, ma->len, ma->advice);
|
||||
ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
|
||||
if (ret < 0)
|
||||
req_set_fail_links(req);
|
||||
io_req_complete(req, ret);
|
||||
|
|
|
@ -531,6 +531,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
|
|||
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
|
||||
const struct path *path = fsnotify_data_path(data, data_type);
|
||||
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
|
||||
struct mem_cgroup *old_memcg;
|
||||
struct inode *child = NULL;
|
||||
bool name_event = false;
|
||||
|
||||
|
@ -580,7 +581,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
|
|||
gfp |= __GFP_RETRY_MAYFAIL;
|
||||
|
||||
/* Whoever is interested in the event, pays for the allocation. */
|
||||
memalloc_use_memcg(group->memcg);
|
||||
old_memcg = set_active_memcg(group->memcg);
|
||||
|
||||
if (fanotify_is_perm_event(mask)) {
|
||||
event = fanotify_alloc_perm_event(path, gfp);
|
||||
|
@ -608,7 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
|
|||
event->pid = get_pid(task_tgid(current));
|
||||
|
||||
out:
|
||||
memalloc_unuse_memcg();
|
||||
set_active_memcg(old_memcg);
|
||||
return event;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
|
|||
int ret;
|
||||
int len = 0;
|
||||
int alloc_len = sizeof(struct inotify_event_info);
|
||||
struct mem_cgroup *old_memcg;
|
||||
|
||||
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
|
||||
path && d_unlinked(path->dentry))
|
||||
|
@ -87,9 +88,9 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
|
|||
* trigger OOM killer in the target monitoring memcg as it may have
|
||||
* security repercussion.
|
||||
*/
|
||||
memalloc_use_memcg(group->memcg);
|
||||
old_memcg = set_active_memcg(group->memcg);
|
||||
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
|
||||
memalloc_unuse_memcg();
|
||||
set_active_memcg(old_memcg);
|
||||
|
||||
if (unlikely(!event)) {
|
||||
/*
|
||||
|
|
|
@ -1531,18 +1531,6 @@ static inline bool memcg_kmem_enabled(void)
|
|||
return static_branch_likely(&memcg_kmem_enabled_key);
|
||||
}
|
||||
|
||||
static inline bool memcg_kmem_bypass(void)
|
||||
{
|
||||
if (in_interrupt())
|
||||
return true;
|
||||
|
||||
/* Allow remote memcg charging in kthread contexts. */
|
||||
if ((!current->mm || (current->flags & PF_KTHREAD)) &&
|
||||
!current->active_memcg)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
||||
int order)
|
||||
{
|
||||
|
|
|
@ -2579,7 +2579,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
|
|||
struct list_head *uf, bool downgrade);
|
||||
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
|
||||
struct list_head *uf);
|
||||
extern int do_madvise(unsigned long start, size_t len_in, int behavior);
|
||||
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern int __mm_populate(unsigned long addr, unsigned long len,
|
||||
|
|
|
@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops;
|
|||
struct file;
|
||||
|
||||
extern struct pid *pidfd_pid(const struct file *file);
|
||||
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
|
||||
|
||||
static inline struct pid *get_pid(struct pid *pid)
|
||||
{
|
||||
|
|
|
@ -279,39 +279,38 @@ static inline void memalloc_nocma_restore(unsigned int flags)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
|
||||
/**
|
||||
* memalloc_use_memcg - Starts the remote memcg charging scope.
|
||||
* set_active_memcg - Starts the remote memcg charging scope.
|
||||
* @memcg: memcg to charge.
|
||||
*
|
||||
* This function marks the beginning of the remote memcg charging scope. All the
|
||||
* __GFP_ACCOUNT allocations till the end of the scope will be charged to the
|
||||
* given memcg.
|
||||
*
|
||||
* NOTE: This function is not nesting safe.
|
||||
* NOTE: This function can nest. Users must save the return value and
|
||||
* reset the previous value after their own charging scope is over.
|
||||
*/
|
||||
static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
|
||||
static inline struct mem_cgroup *
|
||||
set_active_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
WARN_ON_ONCE(current->active_memcg);
|
||||
current->active_memcg = memcg;
|
||||
}
|
||||
struct mem_cgroup *old;
|
||||
|
||||
/**
|
||||
* memalloc_unuse_memcg - Ends the remote memcg charging scope.
|
||||
*
|
||||
* This function marks the end of the remote memcg charging scope started by
|
||||
* memalloc_use_memcg().
|
||||
*/
|
||||
static inline void memalloc_unuse_memcg(void)
|
||||
{
|
||||
current->active_memcg = NULL;
|
||||
if (in_interrupt()) {
|
||||
old = this_cpu_read(int_active_memcg);
|
||||
this_cpu_write(int_active_memcg, memcg);
|
||||
} else {
|
||||
old = current->active_memcg;
|
||||
current->active_memcg = memcg;
|
||||
}
|
||||
|
||||
return old;
|
||||
}
|
||||
#else
|
||||
static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memalloc_unuse_memcg(void)
|
||||
static inline struct mem_cgroup *
|
||||
set_active_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void);
|
|||
asmlinkage long sys_mincore(unsigned long start, size_t len,
|
||||
unsigned char __user * vec);
|
||||
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
|
||||
asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
|
||||
size_t vlen, int behavior, unsigned int flags);
|
||||
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
|
||||
unsigned long prot, unsigned long pgoff,
|
||||
unsigned long flags);
|
||||
|
|
|
@ -24,6 +24,7 @@ struct notifier_block; /* in notifier.h */
|
|||
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
|
||||
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
|
||||
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
|
||||
#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */
|
||||
|
||||
/*
|
||||
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
|
||||
|
@ -121,6 +122,7 @@ extern void vfree_atomic(const void *addr);
|
|||
|
||||
extern void *vmap(struct page **pages, unsigned int count,
|
||||
unsigned long flags, pgprot_t prot);
|
||||
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
|
||||
extern void vunmap(const void *addr);
|
||||
|
||||
extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
|
||||
|
@ -167,6 +169,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
|
|||
unsigned long flags,
|
||||
unsigned long start, unsigned long end,
|
||||
const void *caller);
|
||||
void free_vm_area(struct vm_struct *area);
|
||||
extern struct vm_struct *remove_vm_area(const void *addr);
|
||||
extern struct vm_struct *find_vm_area(const void *addr);
|
||||
|
||||
|
@ -202,10 +205,6 @@ static inline void set_vm_flush_reset_perms(void *addr)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Allocate/destroy a 'vmalloc' VM area. */
|
||||
extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes);
|
||||
extern void free_vm_area(struct vm_struct *area);
|
||||
|
||||
/* for /dev/kmem */
|
||||
extern long vread(char *buf, char *addr, unsigned long count);
|
||||
extern long vwrite(char *buf, char *addr, unsigned long count);
|
||||
|
|
|
@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
|
|||
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
|
||||
#define __NR_faccessat2 439
|
||||
__SYSCALL(__NR_faccessat2, sys_faccessat2)
|
||||
#define __NR_process_madvise 440
|
||||
__SYSCALL(__NR_process_madvise, sys_process_madvise)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 440
|
||||
#define __NR_syscalls 441
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
|
|
@ -1474,25 +1474,6 @@ end:
|
|||
return retval;
|
||||
}
|
||||
|
||||
static struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
|
||||
{
|
||||
struct fd f;
|
||||
struct pid *pid;
|
||||
|
||||
f = fdget(fd);
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
|
||||
pid = pidfd_pid(f.file);
|
||||
if (!IS_ERR(pid)) {
|
||||
get_pid(pid);
|
||||
*flags = f.file->f_flags;
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
return pid;
|
||||
}
|
||||
|
||||
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
|
||||
int options, struct rusage *ru)
|
||||
{
|
||||
|
|
19
kernel/pid.c
19
kernel/pid.c
|
@ -520,6 +520,25 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
|
|||
return idr_get_next(&ns->idr, &nr);
|
||||
}
|
||||
|
||||
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
|
||||
{
|
||||
struct fd f;
|
||||
struct pid *pid;
|
||||
|
||||
f = fdget(fd);
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
|
||||
pid = pidfd_pid(f.file);
|
||||
if (!IS_ERR(pid)) {
|
||||
get_pid(pid);
|
||||
*flags = f.file->f_flags;
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
return pid;
|
||||
}
|
||||
|
||||
/**
|
||||
* pidfd_create() - Create a new pid file descriptor.
|
||||
*
|
||||
|
|
|
@ -280,6 +280,7 @@ COND_SYSCALL(mlockall);
|
|||
COND_SYSCALL(munlockall);
|
||||
COND_SYSCALL(mincore);
|
||||
COND_SYSCALL(madvise);
|
||||
COND_SYSCALL(process_madvise);
|
||||
COND_SYSCALL(remap_file_pages);
|
||||
COND_SYSCALL(mbind);
|
||||
COND_SYSCALL_COMPAT(mbind);
|
||||
|
|
|
@ -816,6 +816,9 @@ config DEVICE_PRIVATE
|
|||
memory; i.e., memory that is only accessible from the device (or
|
||||
group of devices). You likely also want to select HMM_MIRROR.
|
||||
|
||||
config VMAP_PFN
|
||||
bool
|
||||
|
||||
config FRAME_VECTOR
|
||||
bool
|
||||
|
||||
|
|
|
@ -72,6 +72,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
|
|||
int nr;
|
||||
struct page **pages;
|
||||
int ret = 0;
|
||||
bool needs_mmap_lock =
|
||||
cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK;
|
||||
|
||||
if (gup->size > ULONG_MAX)
|
||||
return -EINVAL;
|
||||
|
@ -81,6 +83,11 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
|
|||
if (!pages)
|
||||
return -ENOMEM;
|
||||
|
||||
if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) {
|
||||
ret = -EINTR;
|
||||
goto free_pages;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
nr = gup->nr_pages_per_call;
|
||||
start_time = ktime_get();
|
||||
|
@ -120,9 +127,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
|
|||
pages + i, NULL);
|
||||
break;
|
||||
default:
|
||||
kvfree(pages);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (nr <= 0)
|
||||
|
@ -150,8 +156,11 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
|
|||
end_time = ktime_get();
|
||||
gup->put_delta_usec = ktime_us_delta(end_time, start_time);
|
||||
|
||||
unlock:
|
||||
if (needs_mmap_lock)
|
||||
mmap_read_unlock(current->mm);
|
||||
free_pages:
|
||||
kvfree(pages);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
125
mm/madvise.c
125
mm/madvise.c
|
@ -17,6 +17,8 @@
|
|||
#include <linux/falloc.h>
|
||||
#include <linux/fadvise.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/ksm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
|
@ -27,7 +29,6 @@
|
|||
#include <linux/swapops.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
|
||||
|
@ -258,6 +259,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
|||
struct vm_area_struct **prev,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct file *file = vma->vm_file;
|
||||
loff_t offset;
|
||||
|
||||
|
@ -294,10 +296,10 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
|||
get_file(file);
|
||||
offset = (loff_t)(start - vma->vm_start)
|
||||
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
|
||||
mmap_read_unlock(current->mm);
|
||||
mmap_read_unlock(mm);
|
||||
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
|
||||
fput(file);
|
||||
mmap_read_lock(current->mm);
|
||||
mmap_read_lock(mm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -766,6 +768,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
|||
unsigned long start, unsigned long end,
|
||||
int behavior)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
*prev = vma;
|
||||
if (!can_madv_lru_vma(vma))
|
||||
return -EINVAL;
|
||||
|
@ -773,8 +777,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
|||
if (!userfaultfd_remove(vma, start, end)) {
|
||||
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
vma = find_vma(current->mm, start);
|
||||
mmap_read_lock(mm);
|
||||
vma = find_vma(mm, start);
|
||||
if (!vma)
|
||||
return -ENOMEM;
|
||||
if (start < vma->vm_start) {
|
||||
|
@ -828,6 +832,7 @@ static long madvise_remove(struct vm_area_struct *vma,
|
|||
loff_t offset;
|
||||
int error;
|
||||
struct file *f;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
|
||||
|
||||
|
@ -855,13 +860,13 @@ static long madvise_remove(struct vm_area_struct *vma,
|
|||
get_file(f);
|
||||
if (userfaultfd_remove(vma, start, end)) {
|
||||
/* mmap_lock was not released by userfaultfd_remove() */
|
||||
mmap_read_unlock(current->mm);
|
||||
mmap_read_unlock(mm);
|
||||
}
|
||||
error = vfs_fallocate(f,
|
||||
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
offset, end - start);
|
||||
fput(f);
|
||||
mmap_read_lock(current->mm);
|
||||
mmap_read_lock(mm);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -984,6 +989,18 @@ madvise_behavior_valid(int behavior)
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
process_madvise_behavior_valid(int behavior)
|
||||
{
|
||||
switch (behavior) {
|
||||
case MADV_COLD:
|
||||
case MADV_PAGEOUT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The madvise(2) system call.
|
||||
*
|
||||
|
@ -1031,6 +1048,11 @@ madvise_behavior_valid(int behavior)
|
|||
* MADV_DONTDUMP - the application wants to prevent pages in the given range
|
||||
* from being included in its core dump.
|
||||
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
|
||||
* MADV_COLD - the application is not expected to use this memory soon,
|
||||
* deactivate pages in this range so that they can be reclaimed
|
||||
* easily if memory pressure hanppens.
|
||||
* MADV_PAGEOUT - the application is not expected to use this memory soon,
|
||||
* page out the pages in this range immediately.
|
||||
*
|
||||
* return values:
|
||||
* zero - success
|
||||
|
@ -1045,7 +1067,7 @@ madvise_behavior_valid(int behavior)
|
|||
* -EBADF - map exists, but area maps something that isn't a file.
|
||||
* -EAGAIN - a kernel resource was temporarily unavailable.
|
||||
*/
|
||||
int do_madvise(unsigned long start, size_t len_in, int behavior)
|
||||
int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
|
||||
{
|
||||
unsigned long end, tmp;
|
||||
struct vm_area_struct *vma, *prev;
|
||||
|
@ -1083,10 +1105,10 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
|
|||
|
||||
write = madvise_need_mmap_write(behavior);
|
||||
if (write) {
|
||||
if (mmap_write_lock_killable(current->mm))
|
||||
if (mmap_write_lock_killable(mm))
|
||||
return -EINTR;
|
||||
} else {
|
||||
mmap_read_lock(current->mm);
|
||||
mmap_read_lock(mm);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1094,7 +1116,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
|
|||
* ranges, just ignore them, but return -ENOMEM at the end.
|
||||
* - different from the way of handling in mlock etc.
|
||||
*/
|
||||
vma = find_vma_prev(current->mm, start, &prev);
|
||||
vma = find_vma_prev(mm, start, &prev);
|
||||
if (vma && start > vma->vm_start)
|
||||
prev = vma;
|
||||
|
||||
|
@ -1131,19 +1153,92 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
|
|||
if (prev)
|
||||
vma = prev->vm_next;
|
||||
else /* madvise_remove dropped mmap_lock */
|
||||
vma = find_vma(current->mm, start);
|
||||
vma = find_vma(mm, start);
|
||||
}
|
||||
out:
|
||||
blk_finish_plug(&plug);
|
||||
if (write)
|
||||
mmap_write_unlock(current->mm);
|
||||
mmap_write_unlock(mm);
|
||||
else
|
||||
mmap_read_unlock(current->mm);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
||||
{
|
||||
return do_madvise(start, len_in, behavior);
|
||||
return do_madvise(current->mm, start, len_in, behavior);
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
||||
size_t, vlen, int, behavior, unsigned int, flags)
|
||||
{
|
||||
ssize_t ret;
|
||||
struct iovec iovstack[UIO_FASTIOV], iovec;
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
struct pid *pid;
|
||||
struct task_struct *task;
|
||||
struct mm_struct *mm;
|
||||
size_t total_len;
|
||||
unsigned int f_flags;
|
||||
|
||||
if (flags != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
pid = pidfd_get_pid(pidfd, &f_flags);
|
||||
if (IS_ERR(pid)) {
|
||||
ret = PTR_ERR(pid);
|
||||
goto free_iov;
|
||||
}
|
||||
|
||||
task = get_pid_task(pid, PIDTYPE_PID);
|
||||
if (!task) {
|
||||
ret = -ESRCH;
|
||||
goto put_pid;
|
||||
}
|
||||
|
||||
if (task->mm != current->mm &&
|
||||
!process_madvise_behavior_valid(behavior)) {
|
||||
ret = -EINVAL;
|
||||
goto release_task;
|
||||
}
|
||||
|
||||
mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
|
||||
if (IS_ERR_OR_NULL(mm)) {
|
||||
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
|
||||
goto release_task;
|
||||
}
|
||||
|
||||
total_len = iov_iter_count(&iter);
|
||||
|
||||
while (iov_iter_count(&iter)) {
|
||||
iovec = iov_iter_iovec(&iter);
|
||||
ret = do_madvise(mm, (unsigned long)iovec.iov_base,
|
||||
iovec.iov_len, behavior);
|
||||
if (ret < 0)
|
||||
break;
|
||||
iov_iter_advance(&iter, iovec.iov_len);
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
ret = total_len - iov_iter_count(&iter);
|
||||
|
||||
mmput(mm);
|
||||
return ret;
|
||||
|
||||
release_task:
|
||||
put_task_struct(task);
|
||||
put_pid:
|
||||
put_pid(pid);
|
||||
free_iov:
|
||||
kfree(iov);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -73,6 +73,9 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
|
|||
|
||||
struct mem_cgroup *root_mem_cgroup __read_mostly;
|
||||
|
||||
/* Active memory cgroup to use from an interrupt context */
|
||||
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
|
||||
|
||||
/* Socket memory accounting disabled? */
|
||||
static bool cgroup_memory_nosocket;
|
||||
|
||||
|
@ -1061,23 +1064,56 @@ struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
|
|||
}
|
||||
EXPORT_SYMBOL(get_mem_cgroup_from_page);
|
||||
|
||||
/**
|
||||
* If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
|
||||
*/
|
||||
static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
|
||||
static __always_inline struct mem_cgroup *active_memcg(void)
|
||||
{
|
||||
if (unlikely(current->active_memcg)) {
|
||||
struct mem_cgroup *memcg;
|
||||
if (in_interrupt())
|
||||
return this_cpu_read(int_active_memcg);
|
||||
else
|
||||
return current->active_memcg;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
static __always_inline struct mem_cgroup *get_active_memcg(void)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = active_memcg();
|
||||
if (memcg) {
|
||||
/* current->active_memcg must hold a ref. */
|
||||
if (WARN_ON_ONCE(!css_tryget(¤t->active_memcg->css)))
|
||||
if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
|
||||
memcg = root_mem_cgroup;
|
||||
else
|
||||
memcg = current->active_memcg;
|
||||
rcu_read_unlock();
|
||||
return memcg;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return memcg;
|
||||
}
|
||||
|
||||
static __always_inline bool memcg_kmem_bypass(void)
|
||||
{
|
||||
/* Allow remote memcg charging from any context. */
|
||||
if (unlikely(active_memcg()))
|
||||
return false;
|
||||
|
||||
/* Memcg to charge can't be determined. */
|
||||
if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* If active memcg is set, do not fallback to current->mm->memcg.
|
||||
*/
|
||||
static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
|
||||
{
|
||||
if (memcg_kmem_bypass())
|
||||
return NULL;
|
||||
|
||||
if (unlikely(active_memcg()))
|
||||
return get_active_memcg();
|
||||
|
||||
return get_mem_cgroup_from_mm(current->mm);
|
||||
}
|
||||
|
||||
|
@ -2933,12 +2969,12 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
|
|||
struct obj_cgroup *objcg = NULL;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
if (unlikely(!current->mm && !current->active_memcg))
|
||||
if (memcg_kmem_bypass())
|
||||
return NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
if (unlikely(current->active_memcg))
|
||||
memcg = rcu_dereference(current->active_memcg);
|
||||
if (unlikely(active_memcg()))
|
||||
memcg = active_memcg();
|
||||
else
|
||||
memcg = mem_cgroup_from_task(current);
|
||||
|
||||
|
@ -3059,19 +3095,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
|
|||
struct mem_cgroup *memcg;
|
||||
int ret = 0;
|
||||
|
||||
if (memcg_kmem_bypass())
|
||||
return 0;
|
||||
|
||||
memcg = get_mem_cgroup_from_current();
|
||||
if (!mem_cgroup_is_root(memcg)) {
|
||||
if (memcg && !mem_cgroup_is_root(memcg)) {
|
||||
ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
|
||||
if (!ret) {
|
||||
page->mem_cgroup = memcg;
|
||||
__SetPageKmemcg(page);
|
||||
return 0;
|
||||
}
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
css_put(&memcg->css);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -5290,12 +5323,12 @@ static struct cgroup_subsys_state * __ref
|
|||
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
|
||||
struct mem_cgroup *memcg;
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
long error = -ENOMEM;
|
||||
|
||||
memalloc_use_memcg(parent);
|
||||
old_memcg = set_active_memcg(parent);
|
||||
memcg = mem_cgroup_alloc();
|
||||
memalloc_unuse_memcg();
|
||||
set_active_memcg(old_memcg);
|
||||
if (IS_ERR(memcg))
|
||||
return ERR_CAST(memcg);
|
||||
|
||||
|
|
|
@ -1673,16 +1673,6 @@ int unpoison_memory(unsigned long pfn)
|
|||
}
|
||||
EXPORT_SYMBOL(unpoison_memory);
|
||||
|
||||
static struct page *new_page(struct page *p, unsigned long private)
|
||||
{
|
||||
struct migration_target_control mtc = {
|
||||
.nid = page_to_nid(p),
|
||||
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
|
||||
};
|
||||
|
||||
return alloc_migration_target(p, (unsigned long)&mtc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safely get reference count of an arbitrary page.
|
||||
* Returns 0 for a free page, -EIO for a zero refcount page
|
||||
|
@ -1797,6 +1787,10 @@ static int __soft_offline_page(struct page *page)
|
|||
char const *msg_page[] = {"page", "hugepage"};
|
||||
bool huge = PageHuge(page);
|
||||
LIST_HEAD(pagelist);
|
||||
struct migration_target_control mtc = {
|
||||
.nid = NUMA_NO_NODE,
|
||||
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
|
||||
};
|
||||
|
||||
/*
|
||||
* Check PageHWPoison again inside page lock because PageHWPoison
|
||||
|
@ -1833,8 +1827,8 @@ static int __soft_offline_page(struct page *page)
|
|||
}
|
||||
|
||||
if (isolate_page(hpage, &pagelist)) {
|
||||
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
|
||||
MIGRATE_SYNC, MR_MEMORY_FAILURE);
|
||||
ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
|
||||
(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
|
||||
if (!ret) {
|
||||
bool release = !huge;
|
||||
|
||||
|
|
16
mm/memory.c
16
mm/memory.c
|
@ -2391,13 +2391,15 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|||
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
do {
|
||||
if (create || !pte_none(*pte)) {
|
||||
err = fn(pte++, addr, data);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
if (fn) {
|
||||
do {
|
||||
if (create || !pte_none(*pte)) {
|
||||
err = fn(pte++, addr, data);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
}
|
||||
*mask |= PGTBL_PTE_MODIFIED;
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
|
|
|
@ -1290,27 +1290,6 @@ found:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct page *new_node_page(struct page *page, unsigned long private)
|
||||
{
|
||||
nodemask_t nmask = node_states[N_MEMORY];
|
||||
struct migration_target_control mtc = {
|
||||
.nid = page_to_nid(page),
|
||||
.nmask = &nmask,
|
||||
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
|
||||
};
|
||||
|
||||
/*
|
||||
* try to allocate from a different node but reuse this node if there
|
||||
* are no other online nodes to be used (e.g. we are offlining a part
|
||||
* of the only existing node)
|
||||
*/
|
||||
node_clear(mtc.nid, nmask);
|
||||
if (nodes_empty(nmask))
|
||||
node_set(mtc.nid, nmask);
|
||||
|
||||
return alloc_migration_target(page, (unsigned long)&mtc);
|
||||
}
|
||||
|
||||
static int
|
||||
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
|
@ -1370,9 +1349,28 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
|||
put_page(page);
|
||||
}
|
||||
if (!list_empty(&source)) {
|
||||
/* Allocate a new page from the nearest neighbor node */
|
||||
ret = migrate_pages(&source, new_node_page, NULL, 0,
|
||||
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
|
||||
nodemask_t nmask = node_states[N_MEMORY];
|
||||
struct migration_target_control mtc = {
|
||||
.nmask = &nmask,
|
||||
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
|
||||
};
|
||||
|
||||
/*
|
||||
* We have checked that migration range is on a single zone so
|
||||
* we can use the nid of the first page to all the others.
|
||||
*/
|
||||
mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
|
||||
|
||||
/*
|
||||
* try to allocate from a different node but reuse this node
|
||||
* if there are no other online nodes to be used (e.g. we are
|
||||
* offlining a part of the only existing node)
|
||||
*/
|
||||
node_clear(mtc.nid, nmask);
|
||||
if (nodes_empty(nmask))
|
||||
node_set(mtc.nid, nmask);
|
||||
ret = migrate_pages(&source, alloc_migration_target, NULL,
|
||||
(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
|
||||
if (ret) {
|
||||
list_for_each_entry(page, &source, lru) {
|
||||
pr_warn("migrating pfn %lx failed ret:%d ",
|
||||
|
|
85
mm/migrate.c
85
mm/migrate.c
|
@ -1864,6 +1864,53 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
|
|||
return nr_pages ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct mm_struct *mm;
|
||||
|
||||
/*
|
||||
* There is no need to check if current process has the right to modify
|
||||
* the specified process when they are same.
|
||||
*/
|
||||
if (!pid) {
|
||||
mmget(current->mm);
|
||||
*mem_nodes = cpuset_mems_allowed(current);
|
||||
return current->mm;
|
||||
}
|
||||
|
||||
/* Find the mm_struct */
|
||||
rcu_read_lock();
|
||||
task = find_task_by_vpid(pid);
|
||||
if (!task) {
|
||||
rcu_read_unlock();
|
||||
return ERR_PTR(-ESRCH);
|
||||
}
|
||||
get_task_struct(task);
|
||||
|
||||
/*
|
||||
* Check if this process has the right to modify the specified
|
||||
* process. Use the regular "ptrace_may_access()" checks.
|
||||
*/
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
|
||||
rcu_read_unlock();
|
||||
mm = ERR_PTR(-EPERM);
|
||||
goto out;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
mm = ERR_PTR(security_task_movememory(task));
|
||||
if (IS_ERR(mm))
|
||||
goto out;
|
||||
*mem_nodes = cpuset_mems_allowed(task);
|
||||
mm = get_task_mm(task);
|
||||
out:
|
||||
put_task_struct(task);
|
||||
if (!mm)
|
||||
mm = ERR_PTR(-EINVAL);
|
||||
return mm;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a list of pages in the address space of the currently executing
|
||||
* process.
|
||||
|
@ -1873,7 +1920,6 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
|
|||
const int __user *nodes,
|
||||
int __user *status, int flags)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct mm_struct *mm;
|
||||
int err;
|
||||
nodemask_t task_nodes;
|
||||
|
@ -1885,36 +1931,9 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
|
|||
if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
|
||||
return -EPERM;
|
||||
|
||||
/* Find the mm_struct */
|
||||
rcu_read_lock();
|
||||
task = pid ? find_task_by_vpid(pid) : current;
|
||||
if (!task) {
|
||||
rcu_read_unlock();
|
||||
return -ESRCH;
|
||||
}
|
||||
get_task_struct(task);
|
||||
|
||||
/*
|
||||
* Check if this process has the right to modify the specified
|
||||
* process. Use the regular "ptrace_may_access()" checks.
|
||||
*/
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
|
||||
rcu_read_unlock();
|
||||
err = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
err = security_task_movememory(task);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
task_nodes = cpuset_mems_allowed(task);
|
||||
mm = get_task_mm(task);
|
||||
put_task_struct(task);
|
||||
|
||||
if (!mm)
|
||||
return -EINVAL;
|
||||
mm = find_mm_struct(pid, &task_nodes);
|
||||
if (IS_ERR(mm))
|
||||
return PTR_ERR(mm);
|
||||
|
||||
if (nodes)
|
||||
err = do_pages_move(mm, task_nodes, nr_pages, pages,
|
||||
|
@ -1924,10 +1943,6 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
|
|||
|
||||
mmput(mm);
|
||||
return err;
|
||||
|
||||
out:
|
||||
put_task_struct(task);
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
|
||||
|
|
74
mm/mmap.c
74
mm/mmap.c
|
@ -558,6 +558,50 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* vma_next() - Get the next VMA.
|
||||
* @mm: The mm_struct.
|
||||
* @vma: The current vma.
|
||||
*
|
||||
* If @vma is NULL, return the first vma in the mm.
|
||||
*
|
||||
* Returns: The next VMA after @vma.
|
||||
*/
|
||||
static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
if (!vma)
|
||||
return mm->mmap;
|
||||
|
||||
return vma->vm_next;
|
||||
}
|
||||
|
||||
/*
|
||||
* munmap_vma_range() - munmap VMAs that overlap a range.
|
||||
* @mm: The mm struct
|
||||
* @start: The start of the range.
|
||||
* @len: The length of the range.
|
||||
* @pprev: pointer to the pointer that will be set to previous vm_area_struct
|
||||
* @rb_link: the rb_node
|
||||
* @rb_parent: the parent rb_node
|
||||
*
|
||||
* Find all the vm_area_struct that overlap from @start to
|
||||
* @end and munmap them. Set @pprev to the previous vm_area_struct.
|
||||
*
|
||||
* Returns: -ENOMEM on munmap failure or 0 on success.
|
||||
*/
|
||||
static inline int
|
||||
munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
|
||||
struct vm_area_struct **pprev, struct rb_node ***link,
|
||||
struct rb_node **parent, struct list_head *uf)
|
||||
{
|
||||
|
||||
while (find_vma_links(mm, start, start + len, pprev, link, parent))
|
||||
if (do_munmap(mm, start, len, uf))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
static unsigned long count_vma_pages_range(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
|
@ -1128,10 +1172,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|||
if (vm_flags & VM_SPECIAL)
|
||||
return NULL;
|
||||
|
||||
if (prev)
|
||||
next = prev->vm_next;
|
||||
else
|
||||
next = mm->mmap;
|
||||
next = vma_next(mm, prev);
|
||||
area = next;
|
||||
if (area && area->vm_end == end) /* cases 6, 7, 8 */
|
||||
next = next->vm_next;
|
||||
|
@ -1707,13 +1748,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Clear old maps */
|
||||
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
|
||||
&rb_parent)) {
|
||||
if (do_munmap(mm, addr, len, uf))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Clear old maps, set up prev, rb_link, rb_parent, and uf */
|
||||
if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* Private writable mapping: check memory availability
|
||||
*/
|
||||
|
@ -2632,7 +2669,7 @@ static void unmap_region(struct mm_struct *mm,
|
|||
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
|
||||
struct vm_area_struct *next = vma_next(mm, prev);
|
||||
struct mmu_gather tlb;
|
||||
|
||||
lru_add_drain();
|
||||
|
@ -2831,7 +2868,7 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
|
|||
if (error)
|
||||
return error;
|
||||
}
|
||||
vma = prev ? prev->vm_next : mm->mmap;
|
||||
vma = vma_next(mm, prev);
|
||||
|
||||
if (unlikely(uf)) {
|
||||
/*
|
||||
|
@ -3049,14 +3086,9 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Clear old maps. this also does some error checking for us
|
||||
*/
|
||||
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
|
||||
&rb_parent)) {
|
||||
if (do_munmap(mm, addr, len, uf))
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Clear old maps, set up prev, rb_link, rb_parent, and uf */
|
||||
if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
|
||||
return -ENOMEM;
|
||||
|
||||
/* Check against address space limits *after* clearing old maps... */
|
||||
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
|
||||
|
|
|
@ -354,13 +354,6 @@ void vm_unmap_aliases(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
|
||||
|
||||
struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
|
||||
{
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_vm_area);
|
||||
|
||||
void free_vm_area(struct vm_struct *area)
|
||||
{
|
||||
BUG();
|
||||
|
|
|
@ -1584,8 +1584,7 @@ static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
|
|||
{
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) ||
|
||||
memcg_kmem_bypass())
|
||||
if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT))
|
||||
return PCPU_CHUNK_ROOT;
|
||||
|
||||
objcg = get_obj_cgroup_from_current();
|
||||
|
|
|
@ -280,9 +280,6 @@ static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
|||
{
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
if (memcg_kmem_bypass())
|
||||
return NULL;
|
||||
|
||||
objcg = get_obj_cgroup_from_current();
|
||||
if (!objcg)
|
||||
return NULL;
|
||||
|
|
147
mm/vmalloc.c
147
mm/vmalloc.c
|
@ -1,7 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* linux/mm/vmalloc.c
|
||||
*
|
||||
* Copyright (C) 1993 Linus Torvalds
|
||||
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
||||
* SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
|
||||
|
@ -2321,20 +2319,21 @@ static void __vfree(const void *addr)
|
|||
}
|
||||
|
||||
/**
|
||||
* vfree - release memory allocated by vmalloc()
|
||||
* @addr: memory base address
|
||||
* vfree - Release memory allocated by vmalloc()
|
||||
* @addr: Memory base address
|
||||
*
|
||||
* Free the virtually continuous memory area starting at @addr, as
|
||||
* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
|
||||
* NULL, no operation is performed.
|
||||
* Free the virtually continuous memory area starting at @addr, as obtained
|
||||
* from one of the vmalloc() family of APIs. This will usually also free the
|
||||
* physical memory underlying the virtual allocation, but that memory is
|
||||
* reference counted, so it will not be freed until the last user goes away.
|
||||
*
|
||||
* Must not be called in NMI context (strictly speaking, only if we don't
|
||||
* have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
|
||||
* conventions for vfree() arch-depenedent would be a really bad idea)
|
||||
* If @addr is NULL, no operation is performed.
|
||||
*
|
||||
* Context:
|
||||
* May sleep if called *not* from interrupt context.
|
||||
*
|
||||
* NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
|
||||
* Must not be called in NMI context (strictly speaking, it could be
|
||||
* if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
|
||||
* conventions for vfree() arch-depenedent would be a really bad idea).
|
||||
*/
|
||||
void vfree(const void *addr)
|
||||
{
|
||||
|
@ -2376,8 +2375,11 @@ EXPORT_SYMBOL(vunmap);
|
|||
* @flags: vm_area->flags
|
||||
* @prot: page protection for the mapping
|
||||
*
|
||||
* Maps @count pages from @pages into contiguous kernel virtual
|
||||
* space.
|
||||
* Maps @count pages from @pages into contiguous kernel virtual space.
|
||||
* If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
|
||||
* (which must be kmalloc or vmalloc memory) and one reference per pages in it
|
||||
* are transferred from the caller to vmap(), and will be freed / dropped when
|
||||
* vfree() is called on the return value.
|
||||
*
|
||||
* Return: the address of the area or %NULL on failure
|
||||
*/
|
||||
|
@ -2403,28 +2405,73 @@ void *vmap(struct page **pages, unsigned int count,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (flags & VM_MAP_PUT_PAGES)
|
||||
area->pages = pages;
|
||||
return area->addr;
|
||||
}
|
||||
EXPORT_SYMBOL(vmap);
|
||||
|
||||
#ifdef CONFIG_VMAP_PFN
|
||||
struct vmap_pfn_data {
|
||||
unsigned long *pfns;
|
||||
pgprot_t prot;
|
||||
unsigned int idx;
|
||||
};
|
||||
|
||||
static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
|
||||
{
|
||||
struct vmap_pfn_data *data = private;
|
||||
|
||||
if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
|
||||
return -EINVAL;
|
||||
*pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* vmap_pfn - map an array of PFNs into virtually contiguous space
|
||||
* @pfns: array of PFNs
|
||||
* @count: number of pages to map
|
||||
* @prot: page protection for the mapping
|
||||
*
|
||||
* Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
|
||||
* the start address of the mapping.
|
||||
*/
|
||||
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
|
||||
{
|
||||
struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
|
||||
struct vm_struct *area;
|
||||
|
||||
area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
|
||||
__builtin_return_address(0));
|
||||
if (!area)
|
||||
return NULL;
|
||||
if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
|
||||
count * PAGE_SIZE, vmap_pfn_apply, &data)) {
|
||||
free_vm_area(area);
|
||||
return NULL;
|
||||
}
|
||||
return area->addr;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vmap_pfn);
|
||||
#endif /* CONFIG_VMAP_PFN */
|
||||
|
||||
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
pgprot_t prot, int node)
|
||||
{
|
||||
struct page **pages;
|
||||
unsigned int nr_pages, array_size, i;
|
||||
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
|
||||
const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
|
||||
const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
|
||||
0 :
|
||||
__GFP_HIGHMEM;
|
||||
unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
|
||||
unsigned int array_size = nr_pages * sizeof(struct page *), i;
|
||||
struct page **pages;
|
||||
|
||||
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
|
||||
array_size = (nr_pages * sizeof(struct page *));
|
||||
gfp_mask |= __GFP_NOWARN;
|
||||
if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
|
||||
gfp_mask |= __GFP_HIGHMEM;
|
||||
|
||||
/* Please note that the recursion is strictly bounded. */
|
||||
if (array_size > PAGE_SIZE) {
|
||||
pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
|
||||
node, area->caller);
|
||||
pages = __vmalloc_node(array_size, 1, nested_gfp, node,
|
||||
area->caller);
|
||||
} else {
|
||||
pages = kmalloc_node(array_size, nested_gfp, node);
|
||||
}
|
||||
|
@ -2442,9 +2489,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
|||
struct page *page;
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
page = alloc_page(alloc_mask|highmem_mask);
|
||||
page = alloc_page(gfp_mask);
|
||||
else
|
||||
page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
|
||||
page = alloc_pages_node(node, gfp_mask, 0);
|
||||
|
||||
if (unlikely(!page)) {
|
||||
/* Successfully allocated i pages, free them in __vfree() */
|
||||
|
@ -3032,54 +3079,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
|
|||
}
|
||||
EXPORT_SYMBOL(remap_vmalloc_range);
|
||||
|
||||
static int f(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
pte_t ***p = data;
|
||||
|
||||
if (p) {
|
||||
*(*p) = pte;
|
||||
(*p)++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* alloc_vm_area - allocate a range of kernel address space
|
||||
* @size: size of the area
|
||||
* @ptes: returns the PTEs for the address space
|
||||
*
|
||||
* Returns: NULL on failure, vm_struct on success
|
||||
*
|
||||
* This function reserves a range of kernel address space, and
|
||||
* allocates pagetables to map that range. No actual mappings
|
||||
* are created.
|
||||
*
|
||||
* If @ptes is non-NULL, pointers to the PTEs (in init_mm)
|
||||
* allocated for the VM area are returned.
|
||||
*/
|
||||
struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
|
||||
{
|
||||
struct vm_struct *area;
|
||||
|
||||
area = get_vm_area_caller(size, VM_IOREMAP,
|
||||
__builtin_return_address(0));
|
||||
if (area == NULL)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* This ensures that page tables are constructed for this region
|
||||
* of kernel virtual address space and mapped into init_mm.
|
||||
*/
|
||||
if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
|
||||
size, f, ptes ? &ptes : NULL)) {
|
||||
free_vm_area(area);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return area;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_vm_area);
|
||||
|
||||
void free_vm_area(struct vm_struct *area)
|
||||
{
|
||||
struct vm_struct *ret;
|
||||
|
|
|
@ -1122,10 +1122,16 @@ static inline int __zs_cpu_up(struct mapping_area *area)
|
|||
*/
|
||||
if (area->vm)
|
||||
return 0;
|
||||
area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
|
||||
area->vm = get_vm_area(PAGE_SIZE * 2, 0);
|
||||
if (!area->vm)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Populate ptes in advance to avoid pte allocation with GFP_KERNEL
|
||||
* in non-preemtible context of zs_map_object.
|
||||
*/
|
||||
return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr,
|
||||
PAGE_SIZE * 2, NULL, NULL);
|
||||
}
|
||||
|
||||
static inline void __zs_cpu_down(struct mapping_area *area)
|
||||
|
|
|
@ -45,7 +45,7 @@ struct hmm_buffer {
|
|||
#define TWOMEG (1 << 21)
|
||||
#define HMM_BUFFER_SIZE (1024 << 12)
|
||||
#define HMM_PATH_MAX 64
|
||||
#define NTIMES 256
|
||||
#define NTIMES 10
|
||||
|
||||
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче