diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 570 |
1 files changed, 337 insertions, 233 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ef00d14f8645..6d7baf59d6e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -80,9 +80,24 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin"); MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); +MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); +MODULE_FIRMWARE("amdgpu/picasso_me.bin"); +MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); +MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); +MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/raven2_me.bin"); +MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); +MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); +MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_9_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), @@ -119,7 +134,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = @@ -159,7 +177,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -173,6 +194,29 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), +}; + static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), @@ -210,7 +254,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -240,6 +287,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 +#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); @@ -279,12 +327,16 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; case CHIP_RAVEN: - soc15_program_register_sequence(adev, - golden_settings_gc_9_1, - ARRAY_SIZE(golden_settings_gc_9_1)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_1_rv1, - ARRAY_SIZE(golden_settings_gc_9_1_rv1)); + soc15_program_register_sequence(adev, golden_settings_gc_9_1, + ARRAY_SIZE(golden_settings_gc_9_1)); + if (adev->rev_id >= 8) + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rv2, + ARRAY_SIZE(golden_settings_gc_9_1_rv2)); + else + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rv1, + ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; default: break; @@ -482,6 +534,61 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.me_fw_write_wait = false; + adev->gfx.mec_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_VEGA10: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x000000b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000193) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA12: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x000000b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000196) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA20: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x000000b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000197) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_RAVEN: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x000000b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000192) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + default: + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -509,7 +616,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) chip_name = "vega20"; break; case CHIP_RAVEN: - chip_name = "raven"; + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; break; default: BUG(); @@ -590,14 +702,14 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); if (adev->gfx.rlc.is_rlc_v2_1) @@ -716,6 +828,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", @@ -805,6 +918,50 @@ static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(0); } +static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + uint32_t pg_always_on_cu_num = 2; + uint32_t always_on_cu_num; + uint32_t i, j, k; + uint32_t mask, cu_bitmap, counter; + + if (adev->flags & AMD_IS_APU) + always_on_cu_num = 4; + else if (adev->asic_type == CHIP_VEGA12) + always_on_cu_num = 8; + else + always_on_cu_num = 12; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + cu_bitmap = 0; + counter = 0; + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { + if (cu_info->bitmap[i][j] & mask) { + if (counter == pg_always_on_cu_num) + WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); + if (counter < always_on_cu_num) + cu_bitmap |= mask; + else + break; + counter++; + } + mask <<= 1; + } + + WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); + cu_info->ao_cu_bitmap[i][j] = cu_bitmap; + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) { uint32_t data; @@ -838,8 +995,59 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) data |= 0x00C00000; WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); - /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ - WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); + /* + * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), + * programmed in gfx_v9_0_init_always_on_cu_mask() + */ + + /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, + * but used for RLC_LB_CNTL configuration */ + data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; + data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); + data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); + WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_0_init_always_on_cu_mask(adev); +} + +static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) +{ + uint32_t data; + + /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); + + /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); + + /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); + + mutex_lock(&adev->grbm_idx_mutex); + /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); + + /* set mmRLC_LB_PARAMS = 0x003F_1006 */ + data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); + WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); + + /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ + data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); + data &= 0x0000FFFF; + data |= 0x00C00000; + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); + + /* + * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), + * programmed in gfx_v9_0_init_always_on_cu_mask() + */ /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, * but used for RLC_LB_CNTL configuration */ @@ -848,6 +1056,8 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_0_init_always_on_cu_mask(adev); } static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) @@ -981,8 +1191,17 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) rv_init_cp_jump_table(adev); amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + } + switch (adev->asic_type) { + case CHIP_RAVEN: gfx_v9_0_init_lbpw(adev); + break; + case CHIP_VEGA20: + gfx_v9_4_init_lbpw(adev); + break; + default: + break; } return 0; @@ -1210,7 +1429,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; + if (adev->rev_id >= 8) + gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; + else + gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; default: BUG(); @@ -1421,8 +1643,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), (adev->gds.mem.total_size + - adev->gfx.ngg.gds_reserve_size) >> - AMDGPU_GDS_SHIFT); + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | @@ -1500,11 +1721,6 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; - /* KIQ event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT, &adev->gfx.kiq.irq); - if (r) - return r; - /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -1595,25 +1811,6 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; r = gfx_v9_0_gpu_early_init(adev); @@ -1761,7 +1958,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } -static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; int i; @@ -2317,7 +2514,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || + adev->asic_type == CHIP_VEGA20) { if (amdgpu_lbpw != 0) gfx_v9_0_enable_lbpw(adev, true); else @@ -2610,7 +2808,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) { struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint32_t scratch, tmp = 0; uint64_t queue_mask = 0; int r, i; @@ -2629,17 +2826,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) queue_mask |= (1ull << i); } - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); + r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -2676,24 +2865,12 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("KCQ enable failed\n"); + kiq_ring->ready = false; } - amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -3026,7 +3203,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) struct v9_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3055,26 +3232,33 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; - - gfx_v9_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; + int r; ring = &adev->gfx.kiq.ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } + if (unlikely(r != 0)) + return r; + + gfx_v9_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - goto done; + ring->ready = true; + return 0; +} + +static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v9_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3117,11 +3301,15 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return r; } + r = gfx_v9_0_kiq_resume(adev); + if (r) + return r; + r = gfx_v9_0_cp_gfx_resume(adev); if (r) return r; - r = gfx_v9_0_kiq_resume(adev); + r = gfx_v9_0_kcq_resume(adev); if (r) return r; @@ -3132,12 +3320,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return r; } - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3165,7 +3347,7 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_init_golden_registers(adev); - gfx_v9_0_gpu_init(adev); + gfx_v9_0_constants_init(adev); r = gfx_v9_0_csb_vram_pin(adev); if (r) @@ -3186,71 +3368,45 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = kiq_ring->adev; - uint32_t scratch, tmp = 0; int r, i; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, 10); - if (r) { + r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); + if (r) DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } - /* unmap queues */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); - r = -EINVAL; + amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); } - amdgpu_gfx_scratch_free(adev, scratch); + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + DRM_ERROR("KCQ disable failed\n"); + return r; } static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; - - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_UNGATE); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); /* disable KCQ to avoid CPC touch memory not valid anymore */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) - gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); + gfx_v9_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3266,7 +3422,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* Use deinitialize sequence from CAIL when unbinding device from driver, * otherwise KIQ is hanging when binding back */ - if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq.ring.me, adev->gfx.kiq.ring.pipe, @@ -3286,20 +3442,12 @@ static int gfx_v9_0_hw_fini(void *handle) static int gfx_v9_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->gfx.in_suspend = true; - return gfx_v9_0_hw_fini(adev); + return gfx_v9_0_hw_fini(handle); } static int gfx_v9_0_resume(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = gfx_v9_0_hw_init(adev); - adev->gfx.in_suspend = false; - return r; + return gfx_v9_0_hw_init(handle); } static bool gfx_v9_0_is_idle(void *handle) @@ -3408,15 +3556,6 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - /* GDS Base */ gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, @@ -3763,6 +3902,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); @@ -3782,14 +3925,16 @@ static int gfx_v9_0_set_powergating_state(void *handle, /* update mgcg state */ gfx_v9_0_update_gfx_mg_power_gating(adev, enable); - /* set gfx off through smu */ - if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); + if (enable) + amdgpu_gfx_off_ctrl(adev, true); break; case CHIP_VEGA12: - /* set gfx off through smu */ - if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } else { + amdgpu_gfx_off_ctrl(adev, true); + } break; default: break; @@ -4350,8 +4495,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t ref, uint32_t mask) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? + adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; - if (amdgpu_sriov_vf(ring->adev)) + if (fw_version_ok) gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, ref, mask, 0x20); else @@ -4359,6 +4507,18 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32(mmSQ_CMD, value); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4553,68 +4713,6 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned int type, - enum amdgpu_interrupt_state state) -{ - uint32_t tmp, target; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - if (ring->me == 1) - target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); - else - target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); - target += ring->pipe; - - switch (type) { - case AMDGPU_CP_KIQ_IRQ_DRIVER0: - if (state == AMDGPU_IRQ_STATE_DISABLE) { - tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); - } else { - tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); - } - break; - default: - BUG(); /* kiq only support GENERIC2_INT now */ - break; - } - return 0; -} - -static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; - DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", - me_id, pipe_id, queue_id); - - amdgpu_fence_process(ring); - return 0; -} - static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -4681,6 +4779,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4762,11 +4861,6 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; } -static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = { - .set = gfx_v9_0_kiq_set_interrupt_state, - .process = gfx_v9_0_kiq_irq, -}; - static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { .set = gfx_v9_0_set_eop_interrupt_state, .process = gfx_v9_0_eop_irq, @@ -4792,9 +4886,6 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; - - adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; - adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs; } static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) @@ -4814,7 +4905,20 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + adev->gds.mem.total_size = 0x10000; + break; + case CHIP_RAVEN: + adev->gds.mem.total_size = 0x1000; + break; + default: + adev->gds.mem.total_size = 0x10000; + break; + } + adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; |