drm/amdgpu: add reset_ras_error_count function for GFX
GFX ras error counters are dirty ones after cold reboot Read operation is needed to reset them to 0 Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Родитель
fe5211f19a
Коммит
279375c331
|
@ -206,6 +206,7 @@ struct amdgpu_gfx_funcs {
|
||||||
u32 queue, u32 vmid);
|
u32 queue, u32 vmid);
|
||||||
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
|
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
|
||||||
int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
|
int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
|
||||||
|
void (*reset_ras_error_count) (struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sq_work {
|
struct sq_work {
|
||||||
|
|
|
@ -738,9 +738,9 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
|
||||||
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
|
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
|
||||||
static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
|
static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
|
||||||
void *ras_error_status);
|
void *ras_error_status);
|
||||||
static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
|
|
||||||
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
|
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
|
||||||
void *inject_if);
|
void *inject_if);
|
||||||
|
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
|
||||||
|
|
||||||
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
|
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
|
||||||
uint64_t queue_mask)
|
uint64_t queue_mask)
|
||||||
|
@ -1997,7 +1997,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
|
||||||
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
|
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
|
||||||
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
|
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
|
||||||
.ras_error_inject = &gfx_v9_0_ras_error_inject,
|
.ras_error_inject = &gfx_v9_0_ras_error_inject,
|
||||||
.query_ras_error_count = &gfx_v9_0_query_ras_error_count
|
.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
|
||||||
|
.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
|
static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
|
||||||
|
@ -2008,7 +2009,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
|
||||||
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
|
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
|
||||||
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
|
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
|
||||||
.ras_error_inject = &gfx_v9_4_ras_error_inject,
|
.ras_error_inject = &gfx_v9_4_ras_error_inject,
|
||||||
.query_ras_error_count = &gfx_v9_4_query_ras_error_count
|
.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
|
||||||
|
.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
|
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
|
||||||
|
@ -4395,18 +4397,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (adev->asic_type)
|
|
||||||
{
|
|
||||||
case CHIP_VEGA20:
|
|
||||||
gfx_v9_0_clear_ras_edc_counter(adev);
|
|
||||||
break;
|
|
||||||
case CHIP_ARCTURUS:
|
|
||||||
gfx_v9_4_clear_ras_edc_counter(adev);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
amdgpu_ib_free(adev, &ib, NULL);
|
amdgpu_ib_free(adev, &ib, NULL);
|
||||||
dma_fence_put(f);
|
dma_fence_put(f);
|
||||||
|
@ -4454,6 +4444,10 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
if (adev->gfx.funcs &&
|
||||||
|
adev->gfx.funcs->reset_ras_error_count)
|
||||||
|
adev->gfx.funcs->reset_ras_error_count(adev);
|
||||||
|
|
||||||
r = amdgpu_gfx_ras_late_init(adev);
|
r = amdgpu_gfx_ras_late_init(adev);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
@ -6388,7 +6382,7 @@ static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
|
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
|
|
||||||
|
|
|
@ -893,7 +893,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev)
|
void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
|
|
||||||
|
|
|
@ -32,4 +32,6 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
|
||||||
int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
|
int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
|
||||||
void *inject_if);
|
void *inject_if);
|
||||||
|
|
||||||
|
void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev);
|
||||||
|
|
||||||
#endif /* __GFX_V9_4_H__ */
|
#endif /* __GFX_V9_4_H__ */
|
||||||
|
|
Загрузка…
Ссылка в новой задаче