mm: make mm->pinned_vm an atomic64 counter
Taking a sleeping lock to _only_ increment a variable is quite the overkill, and pretty much all users do this. Furthermore, some drivers (ie: infiniband and scif) that need pinned semantics can go to quite some trouble to actually delay via workqueue (un)accounting for pinned pages when not possible to acquire it. By making the counter atomic we no longer need to hold the mmap_sem and can simply some code around it for pinned_vm users. The counter is 64-bit such that we need not worry about overflows such as rdma user input controlled from userspace. Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Christoph Lameter <cl@linux.com> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Родитель
a2bfd708b1
Коммит
70f8a3ca68
|
@ -166,13 +166,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
|
||||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||||
|
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
|
new_pinned = atomic64_read(&mm->pinned_vm) + npages;
|
||||||
(new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
|
if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
mm->pinned_vm = new_pinned;
|
atomic64_set(&mm->pinned_vm, new_pinned);
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
|
|
||||||
cur_base = addr & PAGE_MASK;
|
cur_base = addr & PAGE_MASK;
|
||||||
|
@ -234,7 +234,7 @@ umem_release:
|
||||||
__ib_umem_release(context->device, umem, 0);
|
__ib_umem_release(context->device, umem, 0);
|
||||||
vma:
|
vma:
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
mm->pinned_vm -= ib_umem_num_pages(umem);
|
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
out:
|
out:
|
||||||
if (vma_list)
|
if (vma_list)
|
||||||
|
@ -263,7 +263,7 @@ static void ib_umem_release_defer(struct work_struct *work)
|
||||||
struct ib_umem *umem = container_of(work, struct ib_umem, work);
|
struct ib_umem *umem = container_of(work, struct ib_umem, work);
|
||||||
|
|
||||||
down_write(&umem->owning_mm->mmap_sem);
|
down_write(&umem->owning_mm->mmap_sem);
|
||||||
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
|
||||||
up_write(&umem->owning_mm->mmap_sem);
|
up_write(&umem->owning_mm->mmap_sem);
|
||||||
|
|
||||||
__ib_umem_release_tail(umem);
|
__ib_umem_release_tail(umem);
|
||||||
|
@ -302,7 +302,7 @@ void ib_umem_release(struct ib_umem *umem)
|
||||||
} else {
|
} else {
|
||||||
down_write(&umem->owning_mm->mmap_sem);
|
down_write(&umem->owning_mm->mmap_sem);
|
||||||
}
|
}
|
||||||
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
|
||||||
up_write(&umem->owning_mm->mmap_sem);
|
up_write(&umem->owning_mm->mmap_sem);
|
||||||
|
|
||||||
__ib_umem_release_tail(umem);
|
__ib_umem_release_tail(umem);
|
||||||
|
|
|
@ -92,7 +92,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
|
||||||
size = DIV_ROUND_UP(size, PAGE_SIZE);
|
size = DIV_ROUND_UP(size, PAGE_SIZE);
|
||||||
|
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
pinned = mm->pinned_vm;
|
pinned = atomic64_read(&mm->pinned_vm);
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
/* First, check the absolute limit against all pinned pages. */
|
/* First, check the absolute limit against all pinned pages. */
|
||||||
|
@ -112,7 +112,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
mm->pinned_vm += ret;
|
atomic64_add(ret, &mm->pinned_vm);
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -131,7 +131,7 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
|
||||||
|
|
||||||
if (mm) { /* during close after signal, mm can be NULL */
|
if (mm) { /* during close after signal, mm can be NULL */
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
mm->pinned_vm -= npages;
|
atomic64_sub(npages, &mm->pinned_vm);
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
|
||||||
goto bail_release;
|
goto bail_release;
|
||||||
}
|
}
|
||||||
|
|
||||||
current->mm->pinned_vm += num_pages;
|
atomic64_add(num_pages, ¤t->mm->pinned_vm);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto bail;
|
goto bail;
|
||||||
|
@ -156,7 +156,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
|
||||||
__qib_release_user_pages(p, num_pages, 1);
|
__qib_release_user_pages(p, num_pages, 1);
|
||||||
|
|
||||||
if (current->mm) {
|
if (current->mm) {
|
||||||
current->mm->pinned_vm -= num_pages;
|
atomic64_sub(num_pages, ¤t->mm->pinned_vm);
|
||||||
up_write(¤t->mm->mmap_sem);
|
up_write(¤t->mm->mmap_sem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -129,7 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||||
uiomr->owning_mm = mm = current->mm;
|
uiomr->owning_mm = mm = current->mm;
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
|
|
||||||
locked = npages + current->mm->pinned_vm;
|
locked = npages + atomic64_read(¤t->mm->pinned_vm);
|
||||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||||
|
|
||||||
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
||||||
|
@ -187,7 +187,7 @@ out:
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
usnic_uiom_put_pages(chunk_list, 0);
|
usnic_uiom_put_pages(chunk_list, 0);
|
||||||
else {
|
else {
|
||||||
mm->pinned_vm = locked;
|
atomic64_set(&mm->pinned_vm, locked);
|
||||||
mmgrab(uiomr->owning_mm);
|
mmgrab(uiomr->owning_mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -441,7 +441,7 @@ static void usnic_uiom_release_defer(struct work_struct *work)
|
||||||
container_of(work, struct usnic_uiom_reg, work);
|
container_of(work, struct usnic_uiom_reg, work);
|
||||||
|
|
||||||
down_write(&uiomr->owning_mm->mmap_sem);
|
down_write(&uiomr->owning_mm->mmap_sem);
|
||||||
uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
|
atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
|
||||||
up_write(&uiomr->owning_mm->mmap_sem);
|
up_write(&uiomr->owning_mm->mmap_sem);
|
||||||
|
|
||||||
__usnic_uiom_release_tail(uiomr);
|
__usnic_uiom_release_tail(uiomr);
|
||||||
|
@ -469,7 +469,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
|
||||||
} else {
|
} else {
|
||||||
down_write(&uiomr->owning_mm->mmap_sem);
|
down_write(&uiomr->owning_mm->mmap_sem);
|
||||||
}
|
}
|
||||||
uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
|
atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
|
||||||
up_write(&uiomr->owning_mm->mmap_sem);
|
up_write(&uiomr->owning_mm->mmap_sem);
|
||||||
|
|
||||||
__usnic_uiom_release_tail(uiomr);
|
__usnic_uiom_release_tail(uiomr);
|
||||||
|
|
|
@ -285,7 +285,7 @@ __scif_dec_pinned_vm_lock(struct mm_struct *mm,
|
||||||
} else {
|
} else {
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
}
|
}
|
||||||
mm->pinned_vm -= nr_pages;
|
atomic64_sub(nr_pages, &mm->pinned_vm);
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -299,7 +299,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
locked = nr_pages;
|
locked = nr_pages;
|
||||||
locked += mm->pinned_vm;
|
locked += atomic64_read(&mm->pinned_vm);
|
||||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||||
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
||||||
dev_err(scif_info.mdev.this_device,
|
dev_err(scif_info.mdev.this_device,
|
||||||
|
@ -307,7 +307,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
|
||||||
locked, lock_limit);
|
locked, lock_limit);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
mm->pinned_vm = locked;
|
atomic64_set(&mm->pinned_vm, locked);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
|
||||||
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
|
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
|
||||||
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
|
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
|
||||||
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
|
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
|
||||||
SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
|
SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
|
||||||
SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
|
SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
|
||||||
SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
|
SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
|
||||||
SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
|
SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
|
||||||
|
|
|
@ -405,7 +405,7 @@ struct mm_struct {
|
||||||
|
|
||||||
unsigned long total_vm; /* Total pages mapped */
|
unsigned long total_vm; /* Total pages mapped */
|
||||||
unsigned long locked_vm; /* Pages that have PG_mlocked set */
|
unsigned long locked_vm; /* Pages that have PG_mlocked set */
|
||||||
unsigned long pinned_vm; /* Refcount permanently increased */
|
atomic64_t pinned_vm; /* Refcount permanently increased */
|
||||||
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
|
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
|
||||||
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
|
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
|
||||||
unsigned long stack_vm; /* VM_STACK */
|
unsigned long stack_vm; /* VM_STACK */
|
||||||
|
|
|
@ -5459,7 +5459,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
||||||
|
|
||||||
/* now it's safe to free the pages */
|
/* now it's safe to free the pages */
|
||||||
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
|
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
|
||||||
vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
|
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
|
||||||
|
|
||||||
/* this has to be the last one */
|
/* this has to be the last one */
|
||||||
rb_free_aux(rb);
|
rb_free_aux(rb);
|
||||||
|
@ -5532,7 +5532,7 @@ again:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
|
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
|
||||||
vma->vm_mm->pinned_vm -= mmap_locked;
|
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
|
||||||
free_uid(mmap_user);
|
free_uid(mmap_user);
|
||||||
|
|
||||||
out_put:
|
out_put:
|
||||||
|
@ -5680,7 +5680,7 @@ accounting:
|
||||||
|
|
||||||
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
lock_limit = rlimit(RLIMIT_MEMLOCK);
|
||||||
lock_limit >>= PAGE_SHIFT;
|
lock_limit >>= PAGE_SHIFT;
|
||||||
locked = vma->vm_mm->pinned_vm + extra;
|
locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
|
||||||
|
|
||||||
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
|
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
|
||||||
!capable(CAP_IPC_LOCK)) {
|
!capable(CAP_IPC_LOCK)) {
|
||||||
|
@ -5721,7 +5721,7 @@ accounting:
|
||||||
unlock:
|
unlock:
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
atomic_long_add(user_extra, &user->locked_vm);
|
atomic_long_add(user_extra, &user->locked_vm);
|
||||||
vma->vm_mm->pinned_vm += extra;
|
atomic64_add(extra, &vma->vm_mm->pinned_vm);
|
||||||
|
|
||||||
atomic_inc(&event->mmap_count);
|
atomic_inc(&event->mmap_count);
|
||||||
} else if (rb) {
|
} else if (rb) {
|
||||||
|
|
|
@ -981,7 +981,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||||
mm_pgtables_bytes_init(mm);
|
mm_pgtables_bytes_init(mm);
|
||||||
mm->map_count = 0;
|
mm->map_count = 0;
|
||||||
mm->locked_vm = 0;
|
mm->locked_vm = 0;
|
||||||
mm->pinned_vm = 0;
|
atomic64_set(&mm->pinned_vm, 0);
|
||||||
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
|
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
|
||||||
spin_lock_init(&mm->page_table_lock);
|
spin_lock_init(&mm->page_table_lock);
|
||||||
spin_lock_init(&mm->arg_lock);
|
spin_lock_init(&mm->arg_lock);
|
||||||
|
|
|
@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
|
||||||
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
|
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
|
||||||
"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
|
"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
|
||||||
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
|
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
|
||||||
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
|
"pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
|
||||||
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
|
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
|
||||||
"start_brk %lx brk %lx start_stack %lx\n"
|
"start_brk %lx brk %lx start_stack %lx\n"
|
||||||
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
|
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
|
||||||
|
@ -166,7 +166,8 @@ void dump_mm(const struct mm_struct *mm)
|
||||||
mm_pgtables_bytes(mm),
|
mm_pgtables_bytes(mm),
|
||||||
mm->map_count,
|
mm->map_count,
|
||||||
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
|
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
|
||||||
mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
|
atomic64_read(&mm->pinned_vm),
|
||||||
|
mm->data_vm, mm->exec_vm, mm->stack_vm,
|
||||||
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
|
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
|
||||||
mm->start_brk, mm->brk, mm->start_stack,
|
mm->start_brk, mm->brk, mm->start_stack,
|
||||||
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
|
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче