diff options
author | Andrey Grodzovsky <andrey.grodzovsky@amd.com> | 2017-12-12 14:09:30 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-12-15 17:14:50 -0500 |
commit | dcebf026e6f69fb79e7f88d10681faf4f8a985ba (patch) | |
tree | 3d31743b19a61d9e826092a8c5d2e97f9f95e1cf /drivers/gpu | |
parent | 3e98d829ad0a59425f816c94447b4ac39a72f632 (diff) | |
download | blackbird-op-linux-dcebf026e6f69fb79e7f88d10681faf4f8a985ba.tar.gz blackbird-op-linux-dcebf026e6f69fb79e7f88d10681faf4f8a985ba.zip |
drm/amdgpu: Add gpu_recovery parameter
Add new parameter to control GPU recovery procedure.
v2:
Add auto logic where reset is disabled for bare metal and enabled
for SR-IOV.
Allow forced reset from debugfs.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 |
8 files changed, 19 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c31c5496dc5e..ffbe99d839a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; +extern int amdgpu_gpu_recovery; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -1910,7 +1911,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ -int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); +int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force); bool amdgpu_need_backup(struct amdgpu_device *adev); void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 046b9d5bc14d..3f63f5ca4fa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3009,11 +3009,12 @@ error: * * @adev: amdgpu device pointer * @job: which job trigger hang + * @force forces reset regardless of amdgpu_gpu_recovery * * Attempt to reset the GPU if it has hung (all asics). * Returns 0 for success or an error on failure. */ -int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) +int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) { struct drm_atomic_state *state = NULL; uint64_t reset_flags = 0; @@ -3024,6 +3025,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) return 0; } + if (!force && (amdgpu_gpu_recovery == 0 || + (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { + DRM_INFO("GPU recovery disabled.\n"); + return 0; + } + dev_info(adev->dev, "GPU reset begin!\n"); mutex_lock(&adev->lock_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b039bdcf84e..b734cd668ff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; +int amdgpu_gpu_recovery = -1; /* auto */ MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); +module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7cb71a8e21df..d3ce12149542 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -705,7 +705,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; seq_printf(m, "gpu recover\n"); - amdgpu_gpu_recover(adev, NULL); + amdgpu_gpu_recover(adev, NULL, true); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index c340774082ea..c43643e8c8c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) reset_work); if (!amdgpu_sriov_vf(adev)) - amdgpu_gpu_recover(adev, NULL); + amdgpu_gpu_recover(adev, NULL, false); } /* Disable *all* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 013c0a8cfb60..be8a437fad54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - amdgpu_gpu_recover(job->adev, job); + amdgpu_gpu_recover(job->adev, job, false); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 71f56900d6fe..7ade56d59c27 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -253,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_gpu_recover(adev, NULL); + amdgpu_gpu_recover(adev, NULL, false); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index df52824c0cd4..e05823d86cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_gpu_recover(adev, NULL); + amdgpu_gpu_recover(adev, NULL, false); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, |