drm/amdgpu: Add multi-GPU DMA mapping helpers
Add BO-type specific helpers functions to DMA-map and unmap kfd_mem_attachments. Implement this functionality for userptrs by creating one SG BO per GPU and filling it with a DMA mapping of the pages from the original mem->bo. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Oak Zeng <Oak.Zeng@amd.com> Acked-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Родитель
7141394edc
Коммит
264fb4d332
|
@ -44,11 +44,17 @@ enum TLB_FLUSH_TYPE {
|
|||
|
||||
struct amdgpu_device;
|
||||
|
||||
enum kfd_mem_attachment_type {
|
||||
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
|
||||
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
|
||||
};
|
||||
|
||||
struct kfd_mem_attachment {
|
||||
struct list_head list;
|
||||
enum kfd_mem_attachment_type type;
|
||||
bool is_mapped;
|
||||
struct amdgpu_bo_va *bo_va;
|
||||
struct amdgpu_device *adev;
|
||||
bool is_mapped;
|
||||
uint64_t va;
|
||||
uint64_t pte_flags;
|
||||
};
|
||||
|
|
|
@ -475,12 +475,120 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
|
|||
return pte_flags;
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_dmamap_userptr(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
enum dma_data_direction direction =
|
||||
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
struct ttm_operation_ctx ctx = {.interruptible = true};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
struct amdgpu_device *adev = attachment->adev;
|
||||
struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
|
||||
struct ttm_tt *ttm = bo->tbo.ttm;
|
||||
int ret;
|
||||
|
||||
ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
|
||||
if (unlikely(!ttm->sg))
|
||||
return -ENOMEM;
|
||||
|
||||
if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
|
||||
return -EINVAL;
|
||||
|
||||
/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
|
||||
ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
|
||||
ttm->num_pages, 0,
|
||||
(u64)ttm->num_pages << PAGE_SHIFT,
|
||||
GFP_KERNEL);
|
||||
if (unlikely(ret))
|
||||
goto free_sg;
|
||||
|
||||
ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
|
||||
if (unlikely(ret))
|
||||
goto release_sg;
|
||||
|
||||
drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
|
||||
ttm->num_pages);
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret)
|
||||
goto unmap_sg;
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_sg:
|
||||
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
|
||||
release_sg:
|
||||
pr_err("DMA map userptr failed: %d\n", ret);
|
||||
sg_free_table(ttm->sg);
|
||||
free_sg:
|
||||
kfree(ttm->sg);
|
||||
ttm->sg = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
switch (attachment->type) {
|
||||
case KFD_MEM_ATT_SHARED:
|
||||
return 0;
|
||||
case KFD_MEM_ATT_USERPTR:
|
||||
return kfd_mem_dmamap_userptr(mem, attachment);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void
|
||||
kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
enum dma_data_direction direction =
|
||||
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
struct ttm_operation_ctx ctx = {.interruptible = false};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
struct amdgpu_device *adev = attachment->adev;
|
||||
struct ttm_tt *ttm = bo->tbo.ttm;
|
||||
|
||||
if (unlikely(!ttm->sg))
|
||||
return;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
|
||||
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
|
||||
sg_free_table(ttm->sg);
|
||||
ttm->sg = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
|
||||
struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
switch (attachment->type) {
|
||||
case KFD_MEM_ATT_SHARED:
|
||||
break;
|
||||
case KFD_MEM_ATT_USERPTR:
|
||||
kfd_mem_dmaunmap_userptr(mem, attachment);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* kfd_mem_attach - Add a BO to a VM
|
||||
*
|
||||
* Everything that needs to bo done only once when a BO is first added
|
||||
* to a VM. It can later be mapped and unmapped many times without
|
||||
* repeating these steps.
|
||||
*
|
||||
* 0. Create BO for DMA mapping, if needed
|
||||
* 1. Allocate and initialize BO VA entry data structure
|
||||
* 2. Add BO to the VM
|
||||
* 3. Determine ASIC-specific PTE flags
|
||||
|
@ -490,10 +598,12 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
|
|||
static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
||||
struct amdgpu_vm *vm, bool is_aql)
|
||||
{
|
||||
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
|
||||
unsigned long bo_size = mem->bo->tbo.base.size;
|
||||
uint64_t va = mem->va;
|
||||
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
|
||||
struct amdgpu_bo *bo[2] = {NULL, NULL};
|
||||
struct drm_gem_object *gobj;
|
||||
int i, ret;
|
||||
|
||||
if (!va) {
|
||||
|
@ -511,14 +621,37 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
|
||||
va + bo_size, vm);
|
||||
|
||||
/* FIXME: For now all attachments use the same BO. This is
|
||||
* incorrect because one BO can only have one DMA mapping
|
||||
* for one GPU. We need one BO per GPU, e.g. a DMABuf
|
||||
* import with dynamic attachment. This will be addressed
|
||||
* one BO-type at a time in subsequent patches.
|
||||
*/
|
||||
bo[i] = mem->bo;
|
||||
drm_gem_object_get(&bo[i]->tbo.base);
|
||||
if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM &&
|
||||
amdgpu_xgmi_same_hive(adev, bo_adev))) {
|
||||
/* Mappings on the local GPU and VRAM mappings in the
|
||||
* local hive share the original BO
|
||||
*/
|
||||
attachment[i]->type = KFD_MEM_ATT_SHARED;
|
||||
bo[i] = mem->bo;
|
||||
drm_gem_object_get(&bo[i]->tbo.base);
|
||||
} else if (i > 0) {
|
||||
/* Multiple mappings on the same GPU share the BO */
|
||||
attachment[i]->type = KFD_MEM_ATT_SHARED;
|
||||
bo[i] = bo[0];
|
||||
drm_gem_object_get(&bo[i]->tbo.base);
|
||||
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
|
||||
/* Create an SG BO to DMA-map userptrs on other GPUs */
|
||||
attachment[i]->type = KFD_MEM_ATT_USERPTR;
|
||||
ret = amdgpu_gem_object_create(adev, bo_size, 1,
|
||||
AMDGPU_GEM_DOMAIN_CPU,
|
||||
0, ttm_bo_type_sg,
|
||||
mem->bo->tbo.base.resv,
|
||||
&gobj);
|
||||
if (ret)
|
||||
goto unwind;
|
||||
bo[i] = gem_to_amdgpu_bo(gobj);
|
||||
bo[i]->parent = amdgpu_bo_ref(mem->bo);
|
||||
} else {
|
||||
/* FIXME: Need to DMA-map other BO types */
|
||||
attachment[i]->type = KFD_MEM_ATT_SHARED;
|
||||
bo[i] = mem->bo;
|
||||
drm_gem_object_get(&bo[i]->tbo.base);
|
||||
}
|
||||
|
||||
/* Add BO to VM internal data structures */
|
||||
attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
|
||||
|
|
Загрузка…
Ссылка в новой задаче