From f9463a4bc8ea2df5ea25c4d6e0be72011e559b95 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Nov 2017 03:56:19 +1000 Subject: [PATCH] drm/nouveau/mmu: implement new vmm frontend These are the new priviledged interfaces to the VMM backends, and expose some functionality that wasn't previously available. It's now possible to allocate a chunk of address-space (even all of it), without causing page tables to be allocated up-front, and then map into it at arbitrary locations. This is the basic primitive used to support features such as sparse mapping, or to allow userspace control over its own address-space, or HMM (where the GPU driver isn't in control of the address-space layout). Rather than being tied to a subtle combination of memory object and VMA properties, arguments that control map flags (ro, kind, etc) are passed explicitly at map time. The compatibility hacks to implement the old frontend on top of the new driver backends have been replaced with something similar to implement the old frontend's interfaces on top of the new frontend. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/fb.h | 4 +- .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 48 +- drivers/gpu/drm/nouveau/nouveau_mem.h | 2 + .../drm/nouveau/nvkm/subdev/instmem/gk20a.c | 1 + .../gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 152 +--- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 787 +++++++++++++++++- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 17 +- .../drm/nouveau/nvkm/subdev/secboot/priv.h | 1 + 8 files changed, 858 insertions(+), 154 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 5b5ff5a9a127..4af663d4d3c7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -1,9 +1,7 @@ #ifndef __NVKM_FB_H__ #define __NVKM_FB_H__ #include -#include - -#include +#include /* memory type/access flags, do not match hardware values */ #define NV_MEM_ACCESS_RO 1 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 630c3cbaf2b9..7fa60d79ec4c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -1,9 +1,6 @@ #ifndef __NVKM_MMU_H__ #define __NVKM_MMU_H__ #include -#include -#include -struct nvkm_gpuobj; struct nvkm_mem; struct nvkm_vm_pgt { @@ -12,14 +9,25 @@ struct nvkm_vm_pgt { }; struct nvkm_vma { - struct nvkm_memory *memory; - struct nvkm_tags *tags; + struct list_head head; + struct rb_node tree; + u64 addr; + u64 size:50; + bool mapref:1; /* PTs (de)referenced on (un)map (vs pre-allocated). */ + bool sparse:1; /* Unmapped PDEs/PTEs will not trigger MMU faults. */ +#define NVKM_VMA_PAGE_NONE 7 + u8 page:3; /* Requested page type (index, or NONE for automatic). */ + u8 refd:3; /* Current page type (index, or NONE for unreferenced). */ + bool used:1; /* Region allocated. */ + bool part:1; /* Region was split from an allocated region by map(). */ + bool user:1; /* Region user-allocated. */ + bool busy:1; /* Region busy (for temporarily preventing user access). */ + struct nvkm_memory *memory; /* Memory currently mapped into VMA. */ + struct nvkm_tags *tags; /* Compression tag reference. */ + + struct nvkm_vma *node; struct nvkm_vm *vm; - struct nvkm_mm_node *node; - union { - u64 offset; - u64 addr; - }; + u64 offset; u32 access; }; @@ -37,8 +45,9 @@ struct nvkm_vm { struct nvkm_vmm_pt *pd; struct list_head join; - struct nvkm_mm mm; - struct kref refcount; + struct list_head list; + struct rb_root free; + struct rb_root root; bool bootstrapped; atomic_t engref[NVKM_SUBDEV_NR]; @@ -57,9 +66,16 @@ void nvkm_vm_put(struct nvkm_vma *); void nvkm_vm_map(struct nvkm_vma *, struct nvkm_mem *); void nvkm_vm_map_at(struct nvkm_vma *, u64 offset, struct nvkm_mem *); void nvkm_vm_unmap(struct nvkm_vma *); -void nvkm_vm_unmap_at(struct nvkm_vma *, u64 offset, u64 length); +int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *, const char *name, struct nvkm_vmm **); +struct nvkm_vmm *nvkm_vmm_ref(struct nvkm_vmm *); +void nvkm_vmm_unref(struct nvkm_vmm **); int nvkm_vmm_boot(struct nvkm_vmm *); +int nvkm_vmm_join(struct nvkm_vmm *, struct nvkm_memory *inst); +void nvkm_vmm_part(struct nvkm_vmm *, struct nvkm_memory *inst); +int nvkm_vmm_get(struct nvkm_vmm *, u8 page, u64 size, struct nvkm_vma **); +void nvkm_vmm_put(struct nvkm_vmm *, struct nvkm_vma **); struct nvkm_vmm_map { struct nvkm_memory *memory; @@ -78,6 +94,12 @@ struct nvkm_vmm_map { u64 ctag; }; +int nvkm_vmm_map(struct nvkm_vmm *, struct nvkm_vma *, void *argv, u32 argc, + struct nvkm_vmm_map *); +void nvkm_vmm_unmap(struct nvkm_vmm *, struct nvkm_vma *); + +struct nvkm_vmm *nvkm_uvmm_search(struct nvkm_client *, u64 handle); + struct nvkm_mmu { const struct nvkm_mmu_func *func; struct nvkm_subdev subdev; diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h b/drivers/gpu/drm/nouveau/nouveau_mem.h index 20930ebc5e21..48388c538420 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.h +++ b/drivers/gpu/drm/nouveau/nouveau_mem.h @@ -1,6 +1,8 @@ #ifndef __NOUVEAU_MEM_H__ #define __NOUVEAU_MEM_H__ +#include #include +#include #include struct ttm_dma_tt; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 816d1231df5f..39f6e8e42339 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -48,6 +48,7 @@ #include #include #include +#include struct gk20a_instobj { struct nvkm_memory memory; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index 31832398f1e9..9bf688df24f0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -266,14 +266,14 @@ nvkm_vm_map_(const struct nvkm_vmm_page *page, struct nvkm_vma *vma, u64 delta, } mutex_lock(&vmm->mutex); - nvkm_vmm_ptes_map(vmm, page, ((u64)vma->node->offset << 12) + delta, - (u64)vma->node->length << 12, map, fn); + nvkm_vmm_ptes_map(vmm, page, vma->node->addr + delta, + vma->node->size, map, fn); mutex_unlock(&vmm->mutex); - nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); - nvkm_memory_unref(&vma->memory); - vma->memory = nvkm_memory_ref(map->memory); - vma->tags = map->tags; + nvkm_memory_tags_put(vma->node->memory, vmm->mmu->subdev.device, &vma->node->tags); + nvkm_memory_unref(&vma->node->memory); + vma->node->memory = nvkm_memory_ref(map->memory); + vma->node->tags = map->tags; } void @@ -314,11 +314,9 @@ nvkm_mmu_ptc_init(struct nvkm_mmu *mmu) void nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node) { - const struct nvkm_vmm_page *page = vma->vm->func->page; + const struct nvkm_vmm_page *page = &vma->vm->func->page[vma->node->page]; if (page->desc->func->unmap) { struct nvkm_vmm_map map = { .mem = node->mem }; - while (page->shift != vma->node->type) - page++; nvkm_vm_map_(page, vma, delta, node, page->desc->func->mem, &map); return; } @@ -328,11 +326,9 @@ static void nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length, struct nvkm_mem *mem) { - const struct nvkm_vmm_page *page = vma->vm->func->page; + const struct nvkm_vmm_page *page = &vma->vm->func->page[vma->node->page]; if (page->desc->func->unmap) { struct nvkm_vmm_map map = { .sgl = mem->sg->sgl }; - while (page->shift != vma->node->type) - page++; nvkm_vm_map_(page, vma, delta, mem, page->desc->func->sgl, &map); return; } @@ -342,11 +338,9 @@ static void nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length, struct nvkm_mem *mem) { - const struct nvkm_vmm_page *page = vma->vm->func->page; + const struct nvkm_vmm_page *page = &vma->vm->func->page[vma->node->page]; if (page->desc->func->unmap) { struct nvkm_vmm_map map = { .dma = mem->pages }; - while (page->shift != vma->node->type) - page++; nvkm_vm_map_(page, vma, delta, mem, page->desc->func->dma, &map); return; } @@ -364,67 +358,30 @@ nvkm_vm_map(struct nvkm_vma *vma, struct nvkm_mem *node) nvkm_vm_map_at(vma, 0, node); } -void -nvkm_vm_unmap_at(struct nvkm_vma *vma, u64 delta, u64 length) -{ - struct nvkm_vm *vm = vma->vm; - if (vm->func->page->desc->func->unmap) { - const struct nvkm_vmm_page *page = vm->func->page; - while (page->shift != vma->node->type) - page++; - mutex_lock(&vm->mutex); - nvkm_vmm_ptes_unmap(vm, page, (vma->node->offset << 12) + delta, - vma->node->length << 12, false); - mutex_unlock(&vm->mutex); - return; - } -} - void nvkm_vm_unmap(struct nvkm_vma *vma) { - nvkm_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); - - nvkm_memory_tags_put(vma->memory, vma->vm->mmu->subdev.device, &vma->tags); - nvkm_memory_unref(&vma->memory); + nvkm_vmm_unmap(vma->vm, vma->node); } int nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, struct nvkm_vma *vma) { - u32 align = (1 << page_shift) >> 12; - u32 msize = size >> 12; int ret; mutex_lock(&vm->mutex); - ret = nvkm_mm_head(&vm->mm, 0, page_shift, msize, msize, align, - &vma->node); - if (unlikely(ret != 0)) { - mutex_unlock(&vm->mutex); - return ret; - } - - if (vm->func->page->desc->func->unmap) { - const struct nvkm_vmm_page *page = vm->func->page; - while (page->shift != page_shift) - page++; - - ret = nvkm_vmm_ptes_get(vm, page, vma->node->offset << 12, - vma->node->length << 12); - if (ret) { - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - return ret; - } - } + ret = nvkm_vmm_get_locked(vm, true, false, false, page_shift, 0, + size, &vma->node); mutex_unlock(&vm->mutex); + if (ret) + return ret; vma->memory = NULL; vma->tags = NULL; vma->vm = NULL; nvkm_vm_ref(vm, &vma->vm, NULL); - vma->offset = (u64)vma->node->offset << 12; + vma->offset = vma->addr = vma->node->addr; vma->access = access; return 0; } @@ -432,30 +389,7 @@ nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, void nvkm_vm_put(struct nvkm_vma *vma) { - struct nvkm_mmu *mmu; - struct nvkm_vm *vm; - - if (unlikely(vma->node == NULL)) - return; - vm = vma->vm; - mmu = vm->mmu; - - nvkm_memory_tags_put(vma->memory, mmu->subdev.device, &vma->tags); - nvkm_memory_unref(&vma->memory); - - mutex_lock(&vm->mutex); - if (vm->func->page->desc->func->unmap) { - const struct nvkm_vmm_page *page = vm->func->page; - while (page->shift != vma->node->type) - page++; - - nvkm_vmm_ptes_put(vm, page, vma->node->offset << 12, - vma->node->length << 12); - } - - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - + nvkm_vmm_put(vma->vm, &vma->node); nvkm_vm_ref(NULL, &vma->vm, NULL); } @@ -465,26 +399,6 @@ nvkm_vm_boot(struct nvkm_vm *vm, u64 size) return nvkm_vmm_boot(vm); } -static int -nvkm_vm_legacy(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - u32 block, struct nvkm_vm *vm) -{ - u64 mm_length = (offset + length) - mm_offset; - int ret; - - kref_init(&vm->refcount); - - if (block > length) - block = length; - - ret = nvkm_mm_init(&vm->mm, 0, mm_offset >> 12, mm_length >> 12, - block >> 12); - if (ret) - return ret; - - return 0; -} - int nvkm_vm_new(struct nvkm_device *device, u64 offset, u64 length, u64 mm_offset, struct lock_class_key *key, struct nvkm_vm **pvm) @@ -501,46 +415,28 @@ nvkm_vm_new(struct nvkm_device *device, u64 offset, u64 length, u64 mm_offset, return ret; } - ret = nvkm_vm_legacy(mmu, offset, length, mm_offset, - (*pvm)->func->page_block ? - (*pvm)->func->page_block : 4096, *pvm); - if (ret) - nvkm_vm_ref(NULL, pvm, NULL); - return ret; } return -EINVAL; } -static void -nvkm_vm_del(struct kref *kref) -{ - struct nvkm_vm *vm = container_of(kref, typeof(*vm), refcount); - - nvkm_mm_fini(&vm->mm); - if (vm->func) - nvkm_vmm_dtor(vm); - kfree(vm); -} - int nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_memory *inst) { if (ref) { - if (ref->func->join && inst) { - int ret = ref->func->join(ref, inst); + if (inst) { + int ret = nvkm_vmm_join(ref, inst); if (ret) return ret; } - kref_get(&ref->refcount); + nvkm_vmm_ref(ref); } if (*ptr) { - if ((*ptr)->func->part && inst) - (*ptr)->func->part(*ptr, inst); - kref_put(&(*ptr)->refcount, nvkm_vm_del); + nvkm_vmm_part(*ptr, inst); + nvkm_vmm_unref(ptr); } *ptr = ref; @@ -553,8 +449,8 @@ nvkm_mmu_oneinit(struct nvkm_subdev *subdev) struct nvkm_mmu *mmu = nvkm_mmu(subdev); if (mmu->func->vmm.global) { - int ret = nvkm_vm_new(subdev->device, 0, mmu->limit, 0, - NULL, &mmu->vmm); + int ret = nvkm_vmm_new(subdev->device, 0, 0, NULL, 0, NULL, + "gart", &mmu->vmm); if (ret) return ret; } @@ -576,7 +472,7 @@ nvkm_mmu_dtor(struct nvkm_subdev *subdev) { struct nvkm_mmu *mmu = nvkm_mmu(subdev); - nvkm_vm_ref(NULL, &mmu->vmm, NULL); + nvkm_vmm_unref(&mmu->vmm); nvkm_mmu_ptc_fini(mmu); return mmu; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 46c7fecf0054..31c4acaf44a0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -22,6 +22,8 @@ #define NVKM_VMM_LEVELS_MAX 5 #include "vmm.h" +#include + static void nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt) { @@ -376,6 +378,25 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, } } +static bool +nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + struct nvkm_vmm_pt *pt = it->pt[0]; + if (it->desc->type == PGD) + memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); + else + if (it->desc->type == LPT) + memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + return nvkm_vmm_unref_ptes(it, ptei, ptes); +} + +static bool +nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes); + return nvkm_vmm_ref_ptes(it, ptei, ptes); +} + static bool nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) { @@ -565,7 +586,111 @@ fail: return addr << page->shift; } -void +static void +nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, + nvkm_vmm_sparse_unref_ptes, NULL, NULL, + page->desc->func->invalid ? + page->desc->func->invalid : page->desc->func->unmap); +} + +static int +nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + if ((page->type & NVKM_VMM_PAGE_SPARSE)) { + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref", + true, nvkm_vmm_sparse_ref_ptes, NULL, + NULL, page->desc->func->sparse); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_sparse_put(vmm, page, addr, size); + return -ENOMEM; + } + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + int m = 0, i; + u64 start = addr; + u64 block; + + while (size) { + /* Limit maximum page size based on remaining size. */ + while (size < (1ULL << page[m].shift)) + m++; + i = m; + + /* Find largest page size suitable for alignment. */ + while (!IS_ALIGNED(addr, 1ULL << page[i].shift)) + i++; + + /* Determine number of PTEs at this page size. */ + if (i != m) { + /* Limited to alignment boundary of next page size. */ + u64 next = 1ULL << page[i - 1].shift; + u64 part = ALIGN(addr, next) - addr; + if (size - part >= next) + block = (part >> page[i].shift) << page[i].shift; + else + block = (size >> page[i].shift) << page[i].shift; + } else { + block = (size >> page[i].shift) << page[i].shift;; + } + + /* Perform operation. */ + if (ref) { + int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block); + if (ret) { + if ((size = addr - start)) + nvkm_vmm_ptes_sparse(vmm, start, size, false); + return ret; + } + } else { + nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block); + } + + size -= block; + addr += block; + } + + return 0; +} + +static void +nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref", + false, nvkm_vmm_unref_ptes, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +static int +nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true, + nvkm_vmm_ref_ptes, func, map, NULL); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false); + return -ENOMEM; + } + return 0; +} + +static void nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size, bool sparse) { @@ -584,7 +709,7 @@ nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, NULL, func, map, NULL); } -void +static void nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size) { @@ -592,7 +717,7 @@ nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, nvkm_vmm_unref_ptes, NULL, NULL, NULL); } -int +static int nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size) { @@ -606,9 +731,120 @@ nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, return 0; } +static inline struct nvkm_vma * +nvkm_vma_new(u64 addr, u64 size) +{ + struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (vma) { + vma->addr = addr; + vma->size = size; + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + } + return vma; +} + +struct nvkm_vma * +nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) +{ + struct nvkm_vma *new; + + BUG_ON(vma->size == tail); + + if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail))) + return NULL; + vma->size -= tail; + + new->mapref = vma->mapref; + new->sparse = vma->sparse; + new->page = vma->page; + new->refd = vma->refd; + new->used = vma->used; + new->part = vma->part; + new->user = vma->user; + new->busy = vma->busy; + list_add(&new->head, &vma->head); + return new; +} + +static void +nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->free.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->size < this->size) + ptr = &parent->rb_left; + else + if (vma->size > this->size) + ptr = &parent->rb_right; + else + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->free); +} + void +nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->root.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->root); +} + +struct nvkm_vma * +nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr) +{ + struct rb_node *node = vmm->root.rb_node; + while (node) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + if (addr < vma->addr) + node = node->rb_left; + else + if (addr >= vma->addr + vma->size) + node = node->rb_right; + else + return vma; + } + return NULL; +} + +static void nvkm_vmm_dtor(struct nvkm_vmm *vmm) { + struct nvkm_vma *vma; + struct rb_node *node; + + while ((node = rb_first(&vmm->root))) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + nvkm_vmm_put(vmm, &vma); + } + if (vmm->bootstrapped) { const struct nvkm_vmm_page *page = vmm->func->page; const u64 limit = vmm->limit - vmm->start; @@ -620,6 +856,11 @@ nvkm_vmm_dtor(struct nvkm_vmm *vmm) nvkm_vmm_ptes_put(vmm, page, vmm->start, limit); } + vma = list_first_entry(&vmm->list, typeof(*vma), head); + list_del(&vma->head); + kfree(vma); + WARN_ON(!list_empty(&vmm->list)); + if (vmm->nullp) { dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024, vmm->nullp, vmm->null); @@ -639,6 +880,7 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, static struct lock_class_key _key; const struct nvkm_vmm_page *page = func->page; const struct nvkm_vmm_desc *desc; + struct nvkm_vma *vma; int levels, bits = 0; vmm->func = func; @@ -689,6 +931,16 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, return -ENOMEM; } + /* Initialise address-space MM. */ + INIT_LIST_HEAD(&vmm->list); + vmm->free = RB_ROOT; + vmm->root = RB_ROOT; + + if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) + return -ENOMEM; + + nvkm_vmm_free_insert(vmm, vma); + list_add(&vma->head, &vmm->list); return 0; } @@ -702,6 +954,494 @@ nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); } +#define node(root, dir) ((root)->head.dir == &vmm->list) ? NULL : \ + list_entry((root)->head.dir, struct nvkm_vma, head) + +void +nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *next; + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + + if (vma->part) { + struct nvkm_vma *prev = node(vma, prev); + if (!prev->memory) { + prev->size += vma->size; + rb_erase(&vma->tree, &vmm->root); + list_del(&vma->head); + kfree(vma); + vma = prev; + } + } + + next = node(vma, next); + if (next && next->part) { + if (!next->memory) { + vma->size += next->size; + rb_erase(&next->tree, &vmm->root); + list_del(&next->head); + kfree(next); + } + } +} + +void +nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd]; + + if (vma->mapref) { + nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse); + vma->refd = NVKM_VMA_PAGE_NONE; + } else { + nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse); + } + + nvkm_vmm_unmap_region(vmm, vma); +} + +void +nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + if (vma->memory) { + mutex_lock(&vmm->mutex); + nvkm_vmm_unmap_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + } +} + +static int +nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + switch (nvkm_memory_target(map->memory)) { + case NVKM_MEM_TARGET_VRAM: + if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) { + VMM_DEBUG(vmm, "%d !VRAM", map->page->shift); + return -EINVAL; + } + break; + case NVKM_MEM_TARGET_HOST: + case NVKM_MEM_TARGET_NCOH: + if (!(map->page->type & NVKM_VMM_PAGE_HOST)) { + VMM_DEBUG(vmm, "%d !HOST", map->page->shift); + return -EINVAL; + } + break; + default: + WARN_ON(1); + return -ENOSYS; + } + + if (!IS_ALIGNED( vma->addr, 1ULL << map->page->shift) || + !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) || + !IS_ALIGNED( map->offset, 1ULL << map->page->shift) || + nvkm_memory_page(map->memory) < map->page->shift) { + VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d", + vma->addr, (u64)vma->size, map->offset, map->page->shift, + nvkm_memory_page(map->memory)); + return -EINVAL; + } + + return vmm->func->valid(vmm, argv, argc, map); +} + +static int +nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + for (map->page = vmm->func->page; map->page->shift; map->page++) { + VMM_DEBUG(vmm, "trying %d", map->page->shift); + if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map)) + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + nvkm_vmm_pte_func func; + int ret; + + /* Make sure we won't overrun the end of the memory object. */ + if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) { + VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx", + nvkm_memory_size(map->memory), + map->offset, (u64)vma->size); + return -EINVAL; + } + + /* Check remaining arguments for validity. */ + if (vma->page == NVKM_VMA_PAGE_NONE && + vma->refd == NVKM_VMA_PAGE_NONE) { + /* Find the largest page size we can perform the mapping at. */ + const u32 debug = vmm->debug; + vmm->debug = 0; + ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + vmm->debug = debug; + if (ret) { + VMM_DEBUG(vmm, "invalid at any page size"); + nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + return -EINVAL; + } + } else { + /* Page size of the VMA is already pre-determined. */ + if (vma->refd != NVKM_VMA_PAGE_NONE) + map->page = &vmm->func->page[vma->refd]; + else + map->page = &vmm->func->page[vma->page]; + + ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map); + if (ret) { + VMM_DEBUG(vmm, "invalid %d\n", ret); + return ret; + } + } + + /* Deal with the 'offset' argument, and fetch the backend function. */ + map->off = map->offset; + if (map->mem) { + for (; map->off; map->mem = map->mem->next) { + u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT; + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->mem; + } else + if (map->sgl) { + for (; map->off; map->sgl = sg_next(map->sgl)) { + u64 size = sg_dma_len(map->sgl); + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->sgl; + } else { + map->dma += map->offset >> PAGE_SHIFT; + map->off = map->offset & PAGE_MASK; + func = map->page->desc->func->dma; + } + + /* Perform the map. */ + if (vma->refd == NVKM_VMA_PAGE_NONE) { + ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func); + if (ret) + return ret; + + vma->refd = map->page - vmm->func->page; + } else { + nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func); + } + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + vma->memory = nvkm_memory_ref(map->memory); + vma->tags = map->tags; + return 0; +} + +int +nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map); + vma->busy = false; + mutex_unlock(&vmm->mutex); + return ret; +} + +static void +nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *prev, *next; + + if ((prev = node(vma, prev)) && !prev->used) { + rb_erase(&prev->tree, &vmm->free); + list_del(&prev->head); + vma->addr = prev->addr; + vma->size += prev->size; + kfree(prev); + } + + if ((next = node(vma, next)) && !next->used) { + rb_erase(&next->tree, &vmm->free); + list_del(&next->head); + vma->size += next->size; + kfree(next); + } + + nvkm_vmm_free_insert(vmm, vma); +} + +void +nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + struct nvkm_vma *next = vma; + + BUG_ON(vma->part); + + if (vma->mapref || !vma->sparse) { + do { + const bool map = next->memory != NULL; + const u8 refd = next->refd; + const u64 addr = next->addr; + u64 size = next->size; + + /* Merge regions that are in the same state. */ + while ((next = node(next, next)) && next->part && + (next->memory != NULL) == map && + (next->refd == refd)) + size += next->size; + + if (map) { + /* Region(s) are mapped, merge the unmap + * and dereference into a single walk of + * the page tree. + */ + nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr, + size, vma->sparse); + } else + if (refd != NVKM_VMA_PAGE_NONE) { + /* Drop allocation-time PTE references. */ + nvkm_vmm_ptes_put(vmm, &page[refd], addr, size); + } + } while (next && next->part); + } + + /* Merge any mapped regions that were split from the initial + * address-space allocation back into the allocated VMA, and + * release memory/compression resources. + */ + next = vma; + do { + if (next->memory) + nvkm_vmm_unmap_region(vmm, next); + } while ((next = node(vma, next)) && next->part); + + if (vma->sparse && !vma->mapref) { + /* Sparse region that was allocated with a fixed page size, + * meaning all relevant PTEs were referenced once when the + * region was allocated, and remained that way, regardless + * of whether memory was mapped into it afterwards. + * + * The process of unmapping, unsparsing, and dereferencing + * PTEs can be done in a single page tree walk. + */ + nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size); + } else + if (vma->sparse) { + /* Sparse region that wasn't allocated with a fixed page size, + * PTE references were taken both at allocation time (to make + * the GPU see the region as sparse), and when mapping memory + * into the region. + * + * The latter was handled above, and the remaining references + * are dealt with here. + */ + nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false); + } + + /* Remove VMA from the list of allocated nodes. */ + rb_erase(&vma->tree, &vmm->root); + + /* Merge VMA back into the free list. */ + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + vma->used = false; + vma->user = false; + nvkm_vmm_put_region(vmm, vma); +} + +void +nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma) +{ + struct nvkm_vma *vma = *pvma; + if (vma) { + mutex_lock(&vmm->mutex); + nvkm_vmm_put_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + *pvma = NULL; + } +} + +int +nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse, + u8 shift, u8 align, u64 size, struct nvkm_vma **pvma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE]; + struct rb_node *node = NULL, *temp; + struct nvkm_vma *vma = NULL, *tmp; + u64 addr, tail; + int ret; + + VMM_TRACE(vmm, "getref %d mapref %d sparse %d " + "shift: %d align: %d size: %016llx", + getref, mapref, sparse, shift, align, size); + + /* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */ + if (unlikely(!size || (!getref && !mapref && sparse))) { + VMM_DEBUG(vmm, "args %016llx %d %d %d", + size, getref, mapref, sparse); + return -EINVAL; + } + + /* Tesla-class GPUs can only select page size per-PDE, which means + * we're required to know the mapping granularity up-front to find + * a suitable region of address-space. + * + * The same goes if we're requesting up-front allocation of PTES. + */ + if (unlikely((getref || vmm->func->page_block) && !shift)) { + VMM_DEBUG(vmm, "page size required: %d %016llx", + getref, vmm->func->page_block); + return -EINVAL; + } + + /* If a specific page size was requested, determine its index and + * make sure the requested size is a multiple of the page size. + */ + if (shift) { + for (page = vmm->func->page; page->shift; page++) { + if (shift == page->shift) + break; + } + + if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) { + VMM_DEBUG(vmm, "page %d %016llx", shift, size); + return -EINVAL; + } + align = max_t(u8, align, shift); + } else { + align = max_t(u8, align, 12); + } + + /* Locate smallest block that can possibly satisfy the allocation. */ + temp = vmm->free.rb_node; + while (temp) { + struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree); + if (this->size < size) { + temp = temp->rb_right; + } else { + node = temp; + temp = temp->rb_left; + } + } + + if (unlikely(!node)) + return -ENOSPC; + + /* Take into account alignment restrictions, trying larger blocks + * in turn until we find a suitable free block. + */ + do { + struct nvkm_vma *this = rb_entry(node, typeof(*this), tree); + struct nvkm_vma *prev = node(this, prev); + struct nvkm_vma *next = node(this, next); + const int p = page - vmm->func->page; + + addr = this->addr; + if (vmm->func->page_block && prev && prev->page != p) + addr = roundup(addr, vmm->func->page_block); + addr = ALIGN(addr, 1ULL << align); + + tail = this->addr + this->size; + if (vmm->func->page_block && next && next->page != p) + tail = rounddown(tail, vmm->func->page_block); + + if (addr <= tail && tail - addr >= size) { + rb_erase(&this->tree, &vmm->free); + vma = this; + break; + } + } while ((node = rb_next(node))); + + if (unlikely(!vma)) + return -ENOSPC; + + /* If the VMA we found isn't already exactly the requested size, + * it needs to be split, and the remaining free blocks returned. + */ + if (addr != vma->addr) { + if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, vma); + vma = tmp; + } + + if (size != vma->size) { + if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, tmp); + } + + /* Pre-allocate page tables and/or setup sparse mappings. */ + if (sparse && getref) + ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size); + else if (sparse) + ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true); + else if (getref) + ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size); + else + ret = 0; + if (ret) { + nvkm_vmm_put_region(vmm, vma); + return ret; + } + + vma->mapref = mapref && !getref; + vma->sparse = sparse; + vma->page = page - vmm->func->page; + vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE; + vma->used = true; + nvkm_vmm_node_insert(vmm, vma); + *pvma = vma; + return 0; +} + +int +nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma); + mutex_unlock(&vmm->mutex); + return ret; +} + +void +nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + if (vmm->func->part && inst) { + mutex_lock(&vmm->mutex); + vmm->func->part(vmm, inst); + mutex_unlock(&vmm->mutex); + } +} + +int +nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + int ret = 0; + if (vmm->func->join) { + mutex_lock(&vmm->mutex); + ret = vmm->func->join(vmm, inst); + mutex_unlock(&vmm->mutex); + } + return ret; +} + static bool nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) { @@ -730,3 +1470,44 @@ nvkm_vmm_boot(struct nvkm_vmm *vmm) vmm->bootstrapped = true; return 0; } + +static void +nvkm_vmm_del(struct kref *kref) +{ + struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref); + nvkm_vmm_dtor(vmm); + kfree(vmm); +} + +void +nvkm_vmm_unref(struct nvkm_vmm **pvmm) +{ + struct nvkm_vmm *vmm = *pvmm; + if (vmm) { + kref_put(&vmm->kref, nvkm_vmm_del); + *pvmm = NULL; + } +} + +struct nvkm_vmm * +nvkm_vmm_ref(struct nvkm_vmm *vmm) +{ + if (vmm) + kref_get(&vmm->kref); + return vmm; +} + +int +nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv, + u32 argc, struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_mmu *mmu = device->mmu; + struct nvkm_vmm *vmm = NULL; + int ret; + ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm); + if (ret) + nvkm_vmm_unref(&vmm); + *pvmm = vmm; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index bb1353e95068..5808012f7395 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -153,16 +153,19 @@ int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32 pd_header, u64 addr, u64 size, struct lock_class_key *, const char *name, struct nvkm_vmm *); -void nvkm_vmm_dtor(struct nvkm_vmm *); -void nvkm_vmm_ptes_put(struct nvkm_vmm *, const struct nvkm_vmm_page *, - u64 addr, u64 size); -int nvkm_vmm_ptes_get(struct nvkm_vmm *, const struct nvkm_vmm_page *, - u64 addr, u64 size); +struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr); +int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref, + bool sparse, u8 page, u8 align, u64 size, + struct nvkm_vma **pvma); +void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma); void nvkm_vmm_ptes_map(struct nvkm_vmm *, const struct nvkm_vmm_page *, u64 addr, u64 size, struct nvkm_vmm_map *, nvkm_vmm_pte_func); -void nvkm_vmm_ptes_unmap(struct nvkm_vmm *, const struct nvkm_vmm_page *, - u64 addr, u64 size, bool sparse); + +struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail); +void nvkm_vmm_node_insert(struct nvkm_vmm *, struct nvkm_vma *); int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, u64, u64, void *, u32, struct lock_class_key *, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h index 885e919a8720..d9091f029506 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h @@ -25,6 +25,7 @@ #include #include +struct nvkm_gpuobj; struct nvkm_secboot_func { int (*oneinit)(struct nvkm_secboot *);