From f2a79be1c094f4d664b6a3fbfd9b5a61f8ff7f02 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Mon, 25 Nov 2019 12:26:09 +0800 Subject: drm/amdgpu: export amdgpu_ras_find_obj to use externally Change it to external interface. Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f80fd3428c98..a2c1ac1b9572 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -611,6 +611,9 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + extern atomic_t amdgpu_ras_in_intr; static inline bool amdgpu_ras_intr_triggered(void) -- cgit v1.2.3 From 00eaa57172a02edddbf445112409e807e0caacd9 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Fri, 25 Oct 2019 17:19:38 +0800 Subject: drm/amdgpu: clear err_event_athub flag after reset exit Otherwise next err_event_athub error cannot call gpu reset. And following resume sequence will not be affected by this flag. v2: create function to clear amdgpu_ras_in_intr for modularity of ras driver Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2957ebf9b97c..5a8506182ade 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3887,6 +3887,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_device *adev, } } + if (!r && amdgpu_ras_intr_triggered()) + amdgpu_ras_intr_cleared(); + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (need_full_reset) { /* post card */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index a2c1ac1b9572..d4ade4739245 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -621,6 +621,11 @@ static inline bool amdgpu_ras_intr_triggered(void) return !!atomic_read(&amdgpu_ras_in_intr); } +static inline void amdgpu_ras_intr_cleared(void) +{ + atomic_set(&amdgpu_ras_in_intr, 0); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); #endif -- cgit v1.2.3 From 619346240932ac86a0f0c42f887827ae759eda47 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 13 Dec 2019 16:46:05 +0800 Subject: drm/amdgpu: drop useless BACO arg in amdgpu_ras_reset_gpu BACO reset mode strategy is determined by latter func when calling amdgpu_ras_reset_gpu. So not to confuse audience, drop it. Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e00b46180d2e..db7b2b3f9966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -641,7 +641,7 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, err_data); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } return AMDGPU_RAS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 04394c45aa03..d5346dc2523a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1870,7 +1870,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) * See feature_enable_on_boot */ amdgpu_ras_disable_all_features(adev, 1); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } } @@ -1933,6 +1933,6 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); - amdgpu_ras_reset_gpu(adev, false); + amdgpu_ras_reset_gpu(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index d4ade4739245..a5fe29a9373e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -494,8 +494,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, - bool is_baco) +static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6010999d9020..a2ee30b16212 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -160,7 +160,7 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); return AMDGPU_RAS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index d4fb9cf27e21..8a6c733d170c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -132,7 +132,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, err_data->err_addr_cnt)) DRM_WARN("Failed to add ras bad page!\n"); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } kfree(err_data->err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index bb701dbfd472..7091782266b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,7 +339,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device /* ras_controller_int is dedicated for nbif ras error, * not the global interrupt for sync flood */ - amdgpu_ras_reset_gpu(adev, true); + amdgpu_ras_reset_gpu(adev); } } -- cgit v1.2.3