From 3f2a06ac814e84fba16592286ec99364098cdd07 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 15 Nov 2019 10:21:23 -0500 Subject: drm/amdgpu: disable gfxoff on original raven MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are still combinations of sbios and firmware that are not stable. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=204689 Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3ebd5c20dfd3..3e4ac2f06c3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1051,8 +1051,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || -- cgit v1.2.3 From 46f719696ee62a7637116791bb4f571d030569cd Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 19 Nov 2019 16:25:25 +0800 Subject: drm/amdgpu: define soc15_ras_field_entry for reuse The struct soc15_ras_field_entry will be reused by other IPs, such as mmhub and gc v2: rename ras_subblock_regs to gc_ras_fields_vg20, because the future asic maybe have a different table. Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++++++++++----------------------- drivers/gpu/drm/amd/amdgpu/soc15.h | 12 ++++++++++++ 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3e4ac2f06c3b..308d5ccbf4e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -131,18 +131,6 @@ MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 -struct ras_gfx_subblock_reg { - const char *name; - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - uint32_t sec_count_mask; - uint32_t sec_count_shift; - uint32_t ded_count_mask; - uint32_t ded_count_shift; -}; - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -5484,7 +5472,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, } -static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { +static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = { { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) @@ -6143,29 +6131,29 @@ static int __get_ras_error_count(const struct soc15_reg_entry *reg, uint32_t i; uint32_t sec_cnt, ded_cnt; - for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { - if(ras_subblock_regs[i].reg_offset != reg->reg_offset || - ras_subblock_regs[i].seg != reg->seg || - ras_subblock_regs[i].inst != reg->inst) + for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) { + if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset || + gc_ras_fields_vg20[i].seg != reg->seg || + gc_ras_fields_vg20[i].inst != reg->inst) continue; sec_cnt = (value & - ras_subblock_regs[i].sec_count_mask) >> - ras_subblock_regs[i].sec_count_shift; + gc_ras_fields_vg20[i].sec_count_mask) >> + gc_ras_fields_vg20[i].sec_count_shift; if (sec_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", - ras_subblock_regs[i].name, + gc_ras_fields_vg20[i].name, se_id, inst_id, sec_cnt); *sec_count += sec_cnt; } ded_cnt = (value & - ras_subblock_regs[i].ded_count_mask) >> - ras_subblock_regs[i].ded_count_shift; + gc_ras_fields_vg20[i].ded_count_mask) >> + gc_ras_fields_vg20[i].ded_count_shift; if (ded_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", - ras_subblock_regs[i].name, + gc_ras_fields_vg20[i].name, se_id, inst_id, ded_cnt); *ded_count += ded_cnt; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..344280b869c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -60,6 +60,18 @@ struct soc15_allowed_register_entry { bool grbm_indexed; }; +struct soc15_ras_field_entry { + const char *name; + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t sec_count_mask; + uint32_t sec_count_shift; + uint32_t ded_count_mask; + uint32_t ded_count_shift; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) -- cgit v1.2.3 From 6e04b2248dfd9b29d4a7b8cb55b491ec2a380298 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 20 Nov 2019 16:32:46 +0000 Subject: drm/amdgpu: Update Arcturus golden registers Signed-off-by: Jay Cornwall Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 308d5ccbf4e3..f6e39b332762 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -692,6 +692,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = -- cgit v1.2.3 From be3e73ea7def13abbb481844aaaa073903e18b03 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 20 Nov 2019 19:21:35 +0800 Subject: drm/amdgpu: apply gpr/gds workaround before enabling GFX EDC mode gfx memory should be initialized before enabling DED and FUE field in mmGB_EDC_MODE Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f6e39b332762..cc8dab5f5b55 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4235,10 +4235,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_gfx_ras_late_init(adev); - if (r) - return r; - r = gfx_v9_0_do_edc_gds_workarounds(adev); if (r) return r; @@ -4248,6 +4244,10 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; + r = amdgpu_gfx_ras_late_init(adev); + if (r) + return r; + return 0; } -- cgit v1.2.3 From 2255d7f36e479646244cd87906a6b87eae73711d Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 26 Nov 2019 14:23:10 -0500 Subject: drm/amdgpu/gfx: Clear more EDC cnt Clear SDMA and HDP EDC counter in GPR workarounds. Signed-off-by: James Zhu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cc8dab5f5b55..c1bffd906fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,6 +48,8 @@ #include "amdgpu_ras.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma1/sdma1_4_0_offset.h" #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -4029,6 +4031,9 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, + { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) -- cgit v1.2.3 From 45317d5ffb4d9bc842ef1f6119bc0b9e36c95f56 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 26 Nov 2019 14:27:46 -0500 Subject: drm/amdgpu/gfx: Increase dispatch packet number For Arcturus, increase dispatch packet number to stress scheduler. Signed-off-by: James Zhu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c1bffd906fb4..047453c16a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4146,7 +4146,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 256; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4174,7 +4174,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 256; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = -- cgit v1.2.3 From 82a829dc8c2bb03cc9b7e5beb1c5479aa3ba7831 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 26 Nov 2019 19:42:25 +0800 Subject: drm/amdgpu: fix calltrace during kmd unload(v3) issue: kernel would report a warning from a double unpin during the driver unloading on the CSB bo why: we unpin it during hw_fini, and there will be another unpin in sw_fini on CSB bo. fix: actually we don't need to pin/unpin it during hw_init/fini since it is created with kernel pinned, we only need to fullfill the CSB again during hw_init to prevent CSB/VRAM lost after S3 v2: get_csb in init_rlc so hw_init() will make CSIB content back even after reset or s3 v3: use bo_create_kernel instead of bo_create_reserved for CSB otherwise the bo_free_kernel() on CSB is not aligned and would lead to its internal reserve pending there forever take care of gfx7/8 as well Signed-off-by: Monk Liu Reviewed-by: Hawking Zhang Reviewed-by: Xiaojie Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 10 +----- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 58 +-------------------------------- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 40 +---------------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 +---------------------- 5 files changed, 6 insertions(+), 144 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index c8793e6cc3c5..6373bfb47d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) */ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; u32 dws; int r; /* allocate clear state block */ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) return r; } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 82ebc17c9037..e200ba5659d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -990,39 +990,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1784,25 +1751,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { - int r; - - if (adev->in_gpu_reset) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (r) - return r; - - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, - (void **)&adev->gfx.rlc.cs_ptr); - if (!r) { - adev->gfx.rlc.funcs->get_csb_buffer(adev, - adev->gfx.rlc.cs_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - } - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - if (r) - return r; - } + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, @@ -3773,10 +3722,6 @@ static int gfx_v10_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gfx_v10_0_csb_vram_pin(adev); - if (r) - return r; - if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -3864,7 +3809,6 @@ static int gfx_v10_0_hw_fini(void *handle) } gfx_v10_0_cp_enable(adev, false); gfx_v10_0_enable_gui_idle_interrupt(adev, false); - gfx_v10_0_csb_vram_unpin(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 58b7ef97bff5..8f20a5dd44fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4553,6 +4553,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 14e774d52727..9b0b22522e83 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3916,6 +3883,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v8_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32(mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -4836,10 +4804,6 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4957,8 +4921,6 @@ static int gfx_v8_0_hw_fini(void *handle) pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); - gfx_v8_0_csb_vram_unpin(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 047453c16a6b..1743fe281354 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1685,39 +1685,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -2405,6 +2372,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -3696,10 +3664,6 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); - r = gfx_v9_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3781,8 +3745,6 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); - gfx_v9_0_csb_vram_unpin(adev); - return 0; } -- cgit v1.2.3 From f83f5a1e115c8dc382a5abaaf0c10374fbcf1038 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 3 Dec 2019 15:40:10 -0500 Subject: drm/amdgpu/gfx: Improvement on EDC GPR workarounds SPI limits total CS waves in flight per SE to no more than 32 * num_cu and we need to stuff 40 waves on a CU to completely clean the SGPR. This is accomplished in the WR by cleaning the SE in two steps, half of the CU per step. Signed-off-by: James Zhu Reviewed-by: Yong Zhao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 83 ++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1743fe281354..2616f1b59bbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3938,24 +3938,37 @@ static const struct soc15_reg_entry vgpr_init_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ }; -static const struct soc15_reg_entry sgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +static const struct soc15_reg_entry sgpr1_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sgpr2_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, }; @@ -4065,7 +4078,9 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) total_size = ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; total_size += - ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + ((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4; total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -4108,7 +4123,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 256; /* x */ + ib.ptr[ib.length_dw++] = 0x40*2; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR1 */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4118,13 +4161,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); - /* SGPR */ + /* SGPR2 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; @@ -4136,7 +4179,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 256; /* x */ + ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = -- cgit v1.2.3 From aaff8b448d2ab8c0ccc8591c997663c54b074293 Mon Sep 17 00:00:00 2001 From: changzhu Date: Thu, 12 Dec 2019 13:46:06 +0800 Subject: drm/amdgpu: enable gfxoff for raven1 refresh When smu version is larger than 0x41e2b, it will load raven_kicker_rlc.bin.To enable gfxoff for raven_kicker_rlc.bin,it needs to avoid adev->pm.pp_feature &= ~PP_GFXOFF_MASK when it loads raven_kicker_rlc.bin. Signed-off-by: changzhu Reviewed-by: Huang Rui Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2616f1b59bbd..3ce6f5f123c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1042,17 +1042,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - /* Disable GFXOFF on original raven. There are combinations - * of sbios and platforms that are not stable. - */ - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) - adev->pm.pp_feature &= ~PP_GFXOFF_MASK; - else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && - adev->gfx.rlc_fw_version < 531) || - (adev->gfx.rlc_fw_version == 53815) || - (adev->gfx.rlc_feature_version < 1) || - !adev->gfx.rlc.is_rlc_v2_1)) + if (!(adev->rev_id >= 0x8 || + adev->pdev->device == 0x15d8) && + (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */ + !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */ adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (adev->pm.pp_feature & PP_GFXOFF_MASK) -- cgit v1.2.3 From d8c61373e05c9de02525fdbfa08a9f0ffe57579a Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 16 Dec 2019 15:42:43 -0500 Subject: drm/amdgpu/gfx: Replace ARRAY_SIZE with size variable Replace ARRAY_SIZE with size variables to support different ASICs. Signed-off-by: James Zhu Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3ce6f5f123c2..f9d228943893 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4060,6 +4060,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; + int compute_dim_x = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se; + int sgpr_work_group_size = 5; + int gpr_reg_size = ARRAY_SIZE(vgpr_init_regs); + int sec_ded_counter_reg_size = ARRAY_SIZE(sec_ded_counter_registers); + /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return 0; @@ -4069,11 +4076,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; total_size = - ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ total_size += - ((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ total_size += - ((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -4100,7 +4107,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* VGPR */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -4116,7 +4123,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 0x40*2; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4128,7 +4135,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* SGPR1 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -4144,7 +4151,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4156,7 +4163,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* SGPR2 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -4172,7 +4179,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4198,7 +4205,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (i = 0; i < sec_ded_counter_reg_size; i++) { for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0x0, k); -- cgit v1.2.3 From 107ab06136658bbbe9b420bb04c656d0b9309476 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 16 Dec 2019 15:46:27 -0500 Subject: drm/amdgpu/gfx: Add mmCOMPUTE_STATIC_THREAD_MGMT_SE4-7 to support Arcturus Add mmCOMPUTE_STATIC_THREAD_MGMT_SE4-7 to support Arcturus Signed-off-by: James Zhu Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 38 +++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f9d228943893..489e4674bde7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3927,42 +3927,54 @@ static const u32 sgpr_init_compute_shader[] = }; static const struct soc15_reg_entry vgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, }; static const struct soc15_reg_entry sgpr1_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, }; static const struct soc15_reg_entry sgpr2_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, }; static const struct soc15_reg_entry sec_ded_counter_registers[] = { @@ -4064,7 +4076,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_cu_per_sh * adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; - int gpr_reg_size = ARRAY_SIZE(vgpr_init_regs); + int gpr_reg_size = compute_dim_x / 16 + 6; int sec_ded_counter_reg_size = ARRAY_SIZE(sec_ded_counter_registers); /* only support when RAS is enabled */ -- cgit v1.2.3 From 57cb635bb4d8ed669fe936f8f03d967178a2f245 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 16 Dec 2019 15:49:11 -0500 Subject: drm/amdgpu/gfx: Add mmSDMA2-7_EDC_COUNTER to support Arcturus Add mmSDMA2-7_EDC_COUNTER to support Arcturus Signed-off-by: James Zhu Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 489e4674bde7..6348021ba64a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,8 +48,15 @@ #include "amdgpu_ras.h" -#include "sdma0/sdma0_4_0_offset.h" -#include "sdma1/sdma1_4_0_offset.h" +#include "sdma0/sdma0_4_2_offset.h" +#include "sdma1/sdma1_4_2_offset.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_offset.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -3926,6 +3933,9 @@ static const u32 sgpr_init_compute_shader[] = 0xbe800080, 0xbf810000, }; +/* When below register arrays changed, please update gpr_reg_size, + and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, + to cover all gfx9 ASICs */ static const struct soc15_reg_entry vgpr_init_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, @@ -4011,9 +4021,15 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, + { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, + { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4077,7 +4093,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; int gpr_reg_size = compute_dim_x / 16 + 6; - int sec_ded_counter_reg_size = ARRAY_SIZE(sec_ded_counter_registers); + int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) -- cgit v1.2.3 From 48ccd5ffe5d5be07640be80155404d64e01327c7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 6 Jan 2020 14:55:54 -0500 Subject: drm/amdgpu/gfx: simplify old firmware warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put it on one line to avoid whitespace issues when printing in the log. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 721f1f7d6cb1..6bc3b937fba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -576,8 +576,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) } if (adev->gfx.cp_fw_write_wait == false) - DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ - GRBM requires 1-cycle delay in cp firmware\n"); + DRM_WARN_ONCE("CP firmware version too old, please update!"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6348021ba64a..a5492e375f29 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -988,8 +988,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || (adev->gfx.pfp_feature_version < 46)) - DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ - GRBM requires 1-cycle delay in cp firmware\n"); + DRM_WARN_ONCE("CP firmware version too old, please update!"); switch (adev->asic_type) { case CHIP_VEGA10: -- cgit v1.2.3 From 5e62db9df684673f4ce7187c3c02e6a995c5cde9 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Jan 2020 23:28:05 +0800 Subject: drm/amdgpu: read sdma edc counter to clear the counters SDMA edc counter registers were added in gfx edc counters array. When querying gfx error counter in that array, there is no way to differentiate sdma instance number for different asic and then results to NULL pointer access when trying to read sdma register base address for instances greater than 2 on Vega20. In addition, this also results to wrong gfx error counters since it actually added sdma edc counters. Therefore, sdma edc counter registers should be separated from gfx edc counter regsiter array and only get initialized when driver tries to enable sdma ras. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +---------- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +++++++ 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a5492e375f29..89c04cfcfe12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4021,14 +4021,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4092,7 +4084,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; int gpr_reg_size = compute_dim_x / 16 + 6; - int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) @@ -4232,7 +4223,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < sec_ded_counter_reg_size; i++) { + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0x0, k); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1e0767e88d19..ec9d7873ed42 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1801,6 +1801,13 @@ static int sdma_v4_0_late_init(void *handle) struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + int i; + + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); + } return adev->sdma.funcs->ras_late_init(adev, &ih_info); } -- cgit v1.2.3 From d44394a9e18f37a17a70b2d2d81594c66151f523 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 17:33:51 -0500 Subject: drm/amdgpu/gfx9: remove unused sdma headers All of the sdma stuff these were used for moves to the sdma code, so remove them. Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 89c04cfcfe12..9b94e9d15f7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,15 +48,6 @@ #include "amdgpu_ras.h" -#include "sdma0/sdma0_4_2_offset.h" -#include "sdma1/sdma1_4_2_offset.h" -#include "sdma2/sdma2_4_2_2_offset.h" -#include "sdma3/sdma3_4_2_2_offset.h" -#include "sdma4/sdma4_4_2_2_offset.h" -#include "sdma5/sdma5_4_2_2_offset.h" -#include "sdma6/sdma6_4_2_2_offset.h" -#include "sdma7/sdma7_4_2_2_offset.h" - #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L -- cgit v1.2.3 From 22d39fe729fdd21acf9c29c5892a71ede4b7c619 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Thu, 9 Jan 2020 17:41:43 -0600 Subject: drm/amdgpu: Match TC hash settings to DF settings (v2) On Arcturus, data fabric hashing is set by the VBIOS, and affects which addresses map to which memory channels. The gfx core's caches also need to know this mapping, but the hash settings for these these caches is set by the driver. This change queries the DF to understand how the VBIOS configured DF, then matches the TC hash configuration bits to do the same thing. v2: squash in warning fix Signed-off-by: Joseph Greathouse Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/df_v1_7.c | 3 +++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 +++++++++++++++++++ 3 files changed, 50 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 03fdeef568d9..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; } static void df_v1_7_sw_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 7bd29d97adfe..f51326598a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -262,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -273,6 +299,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); } static void df_v3_6_sw_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9b94e9d15f7a..ac4153c235ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3637,6 +3637,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3654,6 +3671,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; -- cgit v1.2.3 From f167ea6a14a24308a34c50c28aec1e1ffe666e22 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 15:36:37 -0600 Subject: drm/amdgpu: kiq pm4 function implementation for gfx_v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions implemented from kiq_pm4_funcs struct members for gfx_v9 version. Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ac4153c235ad..1883b09dadfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); -- cgit v1.2.3 From 58e508b6be82387d22785b8a4e54e50554a91594 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 16:16:28 -0600 Subject: drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 tlbs invalidate pointer function added to kiq_pm4_funcs struct. This way, tlb flush can be done through kiq member. TLBs invalidatation implemented for gfx9 and gfx10. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..af4bd279f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6bc3b937fba2..1a500bbcdd1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -345,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1883b09dadfc..84b7c14fdc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) -- cgit v1.2.3 From 4f01f1e58e073d35d49ca15460d00671c6244323 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 18:14:06 -0600 Subject: drm/amdgpu: replace kcq enable/disable functions on gfx_v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] There are HW-indpendent functions that enables and disables kcq. These functions use the kiq_pm4_funcs implementation. [How] Local kcq enable and disable functions removed and replace it by the generic kcq enable under amdgpu_gfx Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 102 +--------------------------------- 1 file changed, 2 insertions(+), 100 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84b7c14fdc27..388a38febb4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3234,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3708,7 +3640,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3812,36 +3744,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3853,7 +3755,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); -- cgit v1.2.3 From 0e5b7a952818e20a6cd0f90096e7c968616b1418 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 10 Jan 2020 11:40:36 +0800 Subject: drm/amdgpu: only set cp active field for kiq queue The mec ucode will set the CP_HQD_ACTIVE bit while the queue is mapped by MAP_QUEUES packet. So we only need set cp active field for kiq queue. Signed-off-by: Huang Rui Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 468f1b1d41e7..874f641de281 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3337,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index cfc1403fc855..46f0533ba43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4558,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 388a38febb4e..46ab46757b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3370,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } -- cgit v1.2.3