drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS v2
Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS instead of a placement limit. That allows us to better handle CPU accessible placements. v2: prevent virtual BO start address from overflowing Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Michel Dänzer <michel.daenzer@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Родитель
f75e237c41
Коммит
89bb5752c0
|
@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
|
||||||
|
|
||||||
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
|
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
|
||||||
unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
|
unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
|
||||||
unsigned lpfn = 0;
|
|
||||||
|
|
||||||
/* This forces a reallocation if the flag wasn't set before */
|
|
||||||
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
|
|
||||||
lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
|
|
||||||
|
|
||||||
places[c].fpfn = 0;
|
places[c].fpfn = 0;
|
||||||
places[c].lpfn = lpfn;
|
places[c].lpfn = 0;
|
||||||
places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
|
places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
|
||||||
TTM_PL_FLAG_VRAM;
|
TTM_PL_FLAG_VRAM;
|
||||||
|
|
||||||
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||||
places[c].lpfn = visible_pfn;
|
places[c].lpfn = visible_pfn;
|
||||||
else
|
else
|
||||||
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
|
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
|
||||||
|
|
||||||
|
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
|
||||||
|
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||||
c++;
|
c++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
|
||||||
const struct ttm_place *place,
|
const struct ttm_place *place,
|
||||||
struct ttm_mem_reg *mem)
|
struct ttm_mem_reg *mem)
|
||||||
{
|
{
|
||||||
struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
|
|
||||||
struct amdgpu_vram_mgr *mgr = man->priv;
|
struct amdgpu_vram_mgr *mgr = man->priv;
|
||||||
struct drm_mm *mm = &mgr->mm;
|
struct drm_mm *mm = &mgr->mm;
|
||||||
struct drm_mm_node *nodes;
|
struct drm_mm_node *nodes;
|
||||||
|
@ -106,8 +105,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
|
||||||
if (!lpfn)
|
if (!lpfn)
|
||||||
lpfn = man->size;
|
lpfn = man->size;
|
||||||
|
|
||||||
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
|
if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
|
||||||
place->lpfn || amdgpu_vram_page_split == -1) {
|
amdgpu_vram_page_split == -1) {
|
||||||
pages_per_node = ~0ul;
|
pages_per_node = ~0ul;
|
||||||
num_nodes = 1;
|
num_nodes = 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -124,12 +123,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
|
||||||
if (place->flags & TTM_PL_FLAG_TOPDOWN)
|
if (place->flags & TTM_PL_FLAG_TOPDOWN)
|
||||||
mode = DRM_MM_INSERT_HIGH;
|
mode = DRM_MM_INSERT_HIGH;
|
||||||
|
|
||||||
|
mem->start = 0;
|
||||||
pages_left = mem->num_pages;
|
pages_left = mem->num_pages;
|
||||||
|
|
||||||
spin_lock(&mgr->lock);
|
spin_lock(&mgr->lock);
|
||||||
for (i = 0; i < num_nodes; ++i) {
|
for (i = 0; i < num_nodes; ++i) {
|
||||||
unsigned long pages = min(pages_left, pages_per_node);
|
unsigned long pages = min(pages_left, pages_per_node);
|
||||||
uint32_t alignment = mem->page_alignment;
|
uint32_t alignment = mem->page_alignment;
|
||||||
|
unsigned long start;
|
||||||
|
|
||||||
if (pages == pages_per_node)
|
if (pages == pages_per_node)
|
||||||
alignment = pages_per_node;
|
alignment = pages_per_node;
|
||||||
|
@ -141,11 +142,19 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
|
||||||
if (unlikely(r))
|
if (unlikely(r))
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
/* Calculate a virtual BO start address to easily check if
|
||||||
|
* everything is CPU accessible.
|
||||||
|
*/
|
||||||
|
start = nodes[i].start + nodes[i].size;
|
||||||
|
if (start > mem->num_pages)
|
||||||
|
start -= mem->num_pages;
|
||||||
|
else
|
||||||
|
start = 0;
|
||||||
|
mem->start = max(mem->start, start);
|
||||||
pages_left -= pages;
|
pages_left -= pages;
|
||||||
}
|
}
|
||||||
spin_unlock(&mgr->lock);
|
spin_unlock(&mgr->lock);
|
||||||
|
|
||||||
mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
|
|
||||||
mem->mm_node = nodes;
|
mem->mm_node = nodes;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче