drm/amdkfd: Fix double release compute pasid
If kfd_process_device_init_vm returns failure after vm is converted to compute vm and vm->pasid set to compute pasid, KFD will not take pdd->drm_file reference. As a result, drm close file handler maybe called to release the compute pasid before KFD process destroy worker to release the same pasid and set vm->pasid to zero, this generates below WARNING backtrace and NULL pointer access. Add helper amdgpu_amdkfd_gpuvm_set_vm_pasid and call it at the last step of kfd_process_device_init_vm, to ensure vm pasid is the original pasid if acquiring vm failed or is the compute pasid with pdd->drm_file reference taken to avoid double release same pasid. amdgpu: Failed to create process VM object ida_free called for id=32770 which is not allocated. WARNING: CPU: 57 PID: 72542 at ../lib/idr.c:522 ida_free+0x96/0x140 RIP: 0010:ida_free+0x96/0x140 Call Trace: amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] drm_file_free.part.13+0x216/0x270 [drm] drm_close_helper.isra.14+0x60/0x70 [drm] drm_release+0x6e/0xf0 [drm] __fput+0xcc/0x280 ____fput+0xe/0x20 task_work_run+0x96/0xc0 do_exit+0x3d0/0xc10 BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:ida_free+0x76/0x140 Call Trace: amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] drm_file_free.part.13+0x216/0x270 [drm] drm_close_helper.isra.14+0x60/0x70 [drm] drm_release+0x6e/0xf0 [drm] __fput+0xcc/0x280 ____fput+0xe/0x20 task_work_run+0x96/0xc0 do_exit+0x3d0/0xc10 Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Родитель
29d48b87db
Коммит
1a799c4c19
|
@ -270,8 +270,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
|
|||
(&((struct amdgpu_fpriv *) \
|
||||
((struct drm_file *)(drm_priv))->driver_priv)->vm)
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
|
||||
struct file *filp, u32 pasid);
|
||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
||||
struct file *filp, u32 pasid,
|
||||
struct file *filp,
|
||||
void **process_info,
|
||||
struct dma_fence **ef);
|
||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
|
||||
|
|
|
@ -1429,8 +1429,36 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
|
|||
amdgpu_bo_unreserve(bo);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
|
||||
struct file *filp, u32 pasid)
|
||||
|
||||
{
|
||||
struct amdgpu_fpriv *drv_priv;
|
||||
struct amdgpu_vm *avm;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_file_to_fpriv(filp, &drv_priv);
|
||||
if (ret)
|
||||
return ret;
|
||||
avm = &drv_priv->vm;
|
||||
|
||||
/* Free the original amdgpu allocated pasid,
|
||||
* will be replaced with kfd allocated pasid.
|
||||
*/
|
||||
if (avm->pasid) {
|
||||
amdgpu_pasid_free(avm->pasid);
|
||||
amdgpu_vm_set_pasid(adev, avm, 0);
|
||||
}
|
||||
|
||||
ret = amdgpu_vm_set_pasid(adev, avm, pasid);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
||||
struct file *filp, u32 pasid,
|
||||
struct file *filp,
|
||||
void **process_info,
|
||||
struct dma_fence **ef)
|
||||
{
|
||||
|
@ -1447,22 +1475,11 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
|||
if (avm->process_info)
|
||||
return -EINVAL;
|
||||
|
||||
/* Free the original amdgpu allocated pasid,
|
||||
* will be replaced with kfd allocated pasid.
|
||||
*/
|
||||
if (avm->pasid) {
|
||||
amdgpu_pasid_free(avm->pasid);
|
||||
amdgpu_vm_set_pasid(adev, avm, 0);
|
||||
}
|
||||
|
||||
/* Convert VM into a compute VM */
|
||||
ret = amdgpu_vm_make_compute(adev, avm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = amdgpu_vm_set_pasid(adev, avm, pasid);
|
||||
if (ret)
|
||||
return ret;
|
||||
/* Initialize KFD part of the VM and process info */
|
||||
ret = init_kfd_vm(avm, process_info, ef);
|
||||
if (ret)
|
||||
|
|
|
@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
|||
p = pdd->process;
|
||||
dev = pdd->dev;
|
||||
|
||||
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
|
||||
dev->adev, drm_file, p->pasid,
|
||||
&p->kgd_process_info, &p->ef);
|
||||
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
|
||||
&p->kgd_process_info,
|
||||
&p->ef);
|
||||
if (ret) {
|
||||
pr_err("Failed to create process VM object\n");
|
||||
return ret;
|
||||
|
@ -1593,10 +1593,16 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
|||
if (ret)
|
||||
goto err_init_cwsr;
|
||||
|
||||
ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
|
||||
if (ret)
|
||||
goto err_set_pasid;
|
||||
|
||||
pdd->drm_file = drm_file;
|
||||
|
||||
return 0;
|
||||
|
||||
err_set_pasid:
|
||||
kfd_process_device_destroy_cwsr_dgpu(pdd);
|
||||
err_init_cwsr:
|
||||
kfd_process_device_destroy_ib_mem(pdd);
|
||||
err_reserve_ib_mem:
|
||||
|
|
Загрузка…
Ссылка в новой задаче