diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 770 |
1 files changed, 404 insertions, 366 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 21c086e02e7b..879a94bbfe12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -26,15 +26,18 @@ #include "amdgpu_gfx.h" #include "amdgpu_ucode.h" #include "si/clearstate_si.h" -#include "si/sid.h" - -#define GFX6_NUM_GFX_RINGS 1 -#define GFX6_NUM_COMPUTE_RINGS 2 -#define STATIC_PER_CU_PG_ENABLE (1 << 3) -#define DYN_PER_CU_PG_ENABLE (1 << 2) -#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 -#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D - +#include "bif/bif_3_0_d.h" +#include "bif/bif_3_0_sh_mask.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" +#include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" +#include "gmc/gmc_6_0_d.h" +#include "gmc/gmc_6_0_sh_mask.h" +#include "dce/dce_6_0_d.h" +#include "dce/dce_6_0_sh_mask.h" +#include "gca/gfx_7_2_enum.h" +#include "si_enums.h" static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); @@ -70,6 +73,15 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *bu //static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v6_0_init_pg(struct amdgpu_device *adev); +#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) +#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) +#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) +#define MICRO_TILE_MODE(x) ((x) << 0) +#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) +#define BANK_WIDTH(x) ((x) << 14) +#define BANK_HEIGHT(x) ((x) << 16) +#define MACRO_TILE_ASPECT(x) ((x) << 18) +#define NUM_BANKS(x) ((x) << 20) static const u32 verde_rlc_save_restore_register_list[] = { @@ -400,8 +412,8 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VERDE || - adev->asic_type == CHIP_OLAND || - adev->asic_type == CHIP_HAINAN) { + adev->asic_type == CHIP_OLAND || + adev->asic_type == CHIP_HAINAN) { for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { switch (reg_offset) { case 0: @@ -414,7 +426,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 1: + case 1: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -434,7 +446,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 3: + case 3: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -444,7 +456,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 4: + case 4: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -454,7 +466,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 5: + case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -464,7 +476,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 6: + case 6: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -474,7 +486,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 7: + case 7: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -484,7 +496,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 8: + case 8: gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -494,7 +506,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 9: + case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -504,7 +516,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 10: + case 10: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -514,7 +526,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 11: + case 11: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -524,7 +536,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 12: + case 12: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -534,7 +546,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 13: + case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -544,7 +556,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 14: + case 14: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -554,7 +566,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 15: + case 15: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -564,7 +576,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 16: + case 16: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -574,7 +586,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 17: + case 17: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P4_8x16) | @@ -584,7 +596,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 21: + case 21: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -594,7 +606,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 22: + case 22: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -604,7 +616,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); break; - case 23: + case 23: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -614,7 +626,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 24: + case 24: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -624,7 +636,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); break; - case 25: + case 25: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | @@ -639,7 +651,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) break; } adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(GB_TILE_MODE0 + reg_offset, gb_tile_moden); + WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); } } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { @@ -879,7 +891,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) break; } adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(GB_TILE_MODE0 + reg_offset, gb_tile_moden); + WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); } } else{ @@ -894,19 +906,23 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 data; if (instance == 0xffffffff) - data = INSTANCE_BROADCAST_WRITES; + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); else - data = INSTANCE_INDEX(instance); + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) - data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; + data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | + GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; else if (se_num == 0xffffffff) - data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); + data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK | + (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT); else if (sh_num == 0xffffffff) - data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); + data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | + (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT); else - data |= SH_INDEX(sh_num) | SE_INDEX(se_num); - WREG32(GRBM_GFX_INDEX, data); + data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) | + (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT); + WREG32(mmGRBM_GFX_INDEX, data); } static u32 gfx_v6_0_create_bitmask(u32 bit_width) @@ -920,11 +936,11 @@ static u32 gfx_v6_0_get_rb_disabled(struct amdgpu_device *adev, { u32 data, mask; - data = RREG32(CC_RB_BACKEND_DISABLE); - data &= BACKEND_DISABLE_MASK; - data |= RREG32(GC_USER_RB_BACKEND_DISABLE); + data = RREG32(mmCC_RB_BACKEND_DISABLE); + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); - data >>= BACKEND_DISABLE_SHIFT; + data >>= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; mask = gfx_v6_0_create_bitmask(max_rb_num_per_se / sh_per_se); @@ -936,14 +952,23 @@ static void gfx_v6_0_raster_config(struct amdgpu_device *adev, u32 *rconf) switch (adev->asic_type) { case CHIP_TAHITI: case CHIP_PITCAIRN: - *rconf |= RB_XSEL2(2) | RB_XSEL | PKR_MAP(2) | PKR_YSEL(1) | - SE_MAP(2) | SE_XSEL(2) | SE_YSEL(2); + *rconf |= + (2 << PA_SC_RASTER_CONFIG__RB_XSEL2__SHIFT) | + (1 << PA_SC_RASTER_CONFIG__RB_XSEL__SHIFT) | + (2 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT) | + (1 << PA_SC_RASTER_CONFIG__PKR_YSEL__SHIFT) | + (2 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT) | + (2 << PA_SC_RASTER_CONFIG__SE_XSEL__SHIFT) | + (2 << PA_SC_RASTER_CONFIG__SE_YSEL__SHIFT); break; case CHIP_VERDE: - *rconf |= RB_XSEL | PKR_MAP(2) | PKR_YSEL(1); + *rconf |= + (1 << PA_SC_RASTER_CONFIG__RB_XSEL__SHIFT) | + (2 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT) | + (1 << PA_SC_RASTER_CONFIG__PKR_YSEL__SHIFT); break; case CHIP_OLAND: - *rconf |= RB_YSEL; + *rconf |= (1 << PA_SC_RASTER_CONFIG__RB_YSEL__SHIFT); break; case CHIP_HAINAN: *rconf |= 0x0; @@ -981,24 +1006,24 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, int idx = (se / 2) * 2; if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { - raster_config_se &= ~SE_MAP_MASK; + raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; if (!se_mask[idx]) { - raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); + raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; } else { - raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); + raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; } } pkr0_mask &= rb_mask; pkr1_mask &= rb_mask; if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { - raster_config_se &= ~PKR_MAP_MASK; + raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; if (!pkr0_mask) { - raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); + raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; } else { - raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); + raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; } } @@ -1009,14 +1034,14 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, rb0_mask &= rb_mask; rb1_mask &= rb_mask; if (!rb0_mask || !rb1_mask) { - raster_config_se &= ~RB_MAP_PKR0_MASK; + raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; if (!rb0_mask) { raster_config_se |= - RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); + RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; } else { raster_config_se |= - RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); + RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; } } @@ -1026,14 +1051,14 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, rb0_mask &= rb_mask; rb1_mask &= rb_mask; if (!rb0_mask || !rb1_mask) { - raster_config_se &= ~RB_MAP_PKR1_MASK; + raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; if (!rb0_mask) { raster_config_se |= - RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); + RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; } else { raster_config_se |= - RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); + RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; } } } @@ -1041,7 +1066,7 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, /* GRBM_GFX_INDEX has a different offset on SI */ gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); - WREG32(PA_SC_RASTER_CONFIG, raster_config_se); + WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); } /* GRBM_GFX_INDEX has a different offset on SI */ @@ -1063,7 +1088,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev, for (j = 0; j < sh_per_se; j++) { gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v6_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); - disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH); + disabled_rbs |= data << ((i * sh_per_se + j) * 2); } } gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1105,7 +1130,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev, if (!adev->gfx.config.backend_enable_mask || adev->gfx.config.num_rbs >= num_rb_pipes) - WREG32(PA_SC_RASTER_CONFIG, data); + WREG32(mmPA_SC_RASTER_CONFIG, data); else gfx_v6_0_write_harvested_raster_configs(adev, data, adev->gfx.config.backend_enable_mask, @@ -1124,11 +1149,11 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev, u32 cu_per_sh) { u32 data, mask; - data = RREG32(CC_GC_SHADER_ARRAY_CONFIG); - data &= INACTIVE_CUS_MASK; - data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG); + data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - data >>= INACTIVE_CUS_SHIFT; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; mask = gfx_v6_0_create_bitmask(cu_per_sh); @@ -1148,7 +1173,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev, for (i = 0; i < se_num; i++) { for (j = 0; j < sh_per_se; j++) { gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); - data = RREG32(SPI_STATIC_THREAD_MGMT_3); + data = RREG32(mmSPI_STATIC_THREAD_MGMT_3); active_cu = gfx_v6_0_get_cu_enabled(adev, cu_per_sh); mask = 1; @@ -1156,7 +1181,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev, mask <<= k; if (active_cu & mask) { data &= ~mask; - WREG32(SPI_STATIC_THREAD_MGMT_3, data); + WREG32(mmSPI_STATIC_THREAD_MGMT_3, data); break; } } @@ -1209,7 +1234,6 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN; break; - case CHIP_VERDE: adev->gfx.config.max_shader_engines = 1; adev->gfx.config.max_tile_pipes = 4; @@ -1266,18 +1290,18 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) break; } - WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); - WREG32(SRBM_INT_CNTL, 1); - WREG32(SRBM_INT_ACK, 1); + WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); + WREG32(mmSRBM_INT_CNTL, 1); + WREG32(mmSRBM_INT_ACK, 1); - WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); + WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); - mc_shared_chmap = RREG32(MC_SHARED_CHMAP); - mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); + mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; - tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; + tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT; adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; if (adev->gfx.config.mem_row_size_in_kb > 4) adev->gfx.config.mem_row_size_in_kb = 4; @@ -1285,32 +1309,33 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) adev->gfx.config.num_gpus = 1; adev->gfx.config.multi_gpu_tile_size = 64; - gb_addr_config &= ~ROW_SIZE_MASK; + gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: default: - gb_addr_config |= ROW_SIZE(0); + gb_addr_config |= 0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT; break; case 2: - gb_addr_config |= ROW_SIZE(1); + gb_addr_config |= 1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT; break; case 4: - gb_addr_config |= ROW_SIZE(2); + gb_addr_config |= 2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT; break; } adev->gfx.config.gb_addr_config = gb_addr_config; - WREG32(GB_ADDR_CONFIG, gb_addr_config); - WREG32(DMIF_ADDR_CONFIG, gb_addr_config); - WREG32(DMIF_ADDR_CALC, gb_addr_config); - WREG32(HDP_ADDR_CONFIG, gb_addr_config); - WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); - WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); + WREG32(mmGB_ADDR_CONFIG, gb_addr_config); + WREG32(mmDMIF_ADDR_CONFIG, gb_addr_config); + WREG32(mmDMIF_ADDR_CALC, gb_addr_config); + WREG32(mmHDP_ADDR_CONFIG, gb_addr_config); + WREG32(mmDMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); + WREG32(mmDMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); + #if 0 if (adev->has_uvd) { - WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); - WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); - WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); + WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); } #endif gfx_v6_0_tiling_mode_table_init(adev); @@ -1325,45 +1350,48 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) gfx_v6_0_get_cu_info(adev); - WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | - ROQ_IB2_START(0x2b))); - WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); + WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | + (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); + WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); - sx_debug_1 = RREG32(SX_DEBUG_1); - WREG32(SX_DEBUG_1, sx_debug_1); + sx_debug_1 = RREG32(mmSX_DEBUG_1); + WREG32(mmSX_DEBUG_1, sx_debug_1); - WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT)); - WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(adev->gfx.config.sc_prim_fifo_size_frontend) | - SC_BACKEND_PRIM_FIFO_SIZE(adev->gfx.config.sc_prim_fifo_size_backend) | - SC_HIZ_TILE_FIFO_SIZE(adev->gfx.config.sc_hiz_tile_fifo_size) | - SC_EARLYZ_TILE_FIFO_SIZE(adev->gfx.config.sc_earlyz_tile_fifo_size))); + WREG32(mmPA_SC_FIFO_SIZE, ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT))); - WREG32(VGT_NUM_INSTANCES, 1); - WREG32(CP_PERFMON_CNTL, 0); - WREG32(SQ_CONFIG, 0); - WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | - FORCE_EOV_MAX_REZ_CNT(255))); + WREG32(mmVGT_NUM_INSTANCES, 1); + WREG32(mmCP_PERFMON_CNTL, 0); + WREG32(mmSQ_CONFIG, 0); + WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS, ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) | + (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT))); - WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | - AUTO_INVLD_EN(ES_AND_GS_AUTO)); + WREG32(mmVGT_CACHE_INVALIDATION, + (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) | + (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT)); - WREG32(VGT_GS_VERTEX_REUSE, 16); - WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(mmVGT_GS_VERTEX_REUSE, 16); + WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0); - WREG32(CB_PERFCOUNTER0_SELECT0, 0); - WREG32(CB_PERFCOUNTER0_SELECT1, 0); - WREG32(CB_PERFCOUNTER1_SELECT0, 0); - WREG32(CB_PERFCOUNTER1_SELECT1, 0); - WREG32(CB_PERFCOUNTER2_SELECT0, 0); - WREG32(CB_PERFCOUNTER2_SELECT1, 0); - WREG32(CB_PERFCOUNTER3_SELECT0, 0); - WREG32(CB_PERFCOUNTER3_SELECT1, 0); + WREG32(mmCB_PERFCOUNTER0_SELECT0, 0); + WREG32(mmCB_PERFCOUNTER0_SELECT1, 0); + WREG32(mmCB_PERFCOUNTER1_SELECT0, 0); + WREG32(mmCB_PERFCOUNTER1_SELECT1, 0); + WREG32(mmCB_PERFCOUNTER2_SELECT0, 0); + WREG32(mmCB_PERFCOUNTER2_SELECT1, 0); + WREG32(mmCB_PERFCOUNTER3_SELECT0, 0); + WREG32(mmCB_PERFCOUNTER3_SELECT1, 0); - hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); - WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + hdp_host_path_cntl = RREG32(mmHDP_HOST_PATH_CNTL); + WREG32(mmHDP_HOST_PATH_CNTL, hdp_host_path_cntl); - WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); + WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK | + (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT)); udelay(50); } @@ -1374,7 +1402,7 @@ static void gfx_v6_0_scratch_init(struct amdgpu_device *adev) int i; adev->gfx.scratch.num_reg = 7; - adev->gfx.scratch.reg_base = SCRATCH_REG0; + adev->gfx.scratch.reg_base = mmSCRATCH_REG0; for (i = 0; i < adev->gfx.scratch.num_reg; i++) { adev->gfx.scratch.free[i] = true; adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; @@ -1430,11 +1458,18 @@ static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL); + amdgpu_ring_write(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0x1); } +static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); + amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | + EVENT_INDEX(0)); +} + /** * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp * @@ -1448,7 +1483,7 @@ static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, HDP_DEBUG0); + amdgpu_ring_write(ring, mmHDP_DEBUG0); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0x1); } @@ -1460,7 +1495,7 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; /* flush read cache over gart */ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - amdgpu_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START)); + amdgpu_ring_write(ring, (mmCP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | @@ -1475,7 +1510,8 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | - DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); + ((write64bit ? 2 : 1) << CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT) | + ((int_sel ? 2 : 0) << CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); } @@ -1578,11 +1614,13 @@ err1: static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; - if (enable) - WREG32(CP_ME_CNTL, 0); - else { - WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); - WREG32(SCRATCH_UMSK, 0); + if (enable) { + WREG32(mmCP_ME_CNTL, 0); + } else { + WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | + CP_ME_CNTL__PFP_HALT_MASK | + CP_ME_CNTL__CE_HALT_MASK)); + WREG32(mmSCRATCH_UMSK, 0); for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].ready = false; for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -1616,34 +1654,33 @@ static int gfx_v6_0_cp_gfx_load_microcode(struct amdgpu_device *adev) fw_data = (const __le32 *) (adev->gfx.pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; - WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(mmCP_PFP_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32(mmCP_PFP_UCODE_ADDR, 0); /* CE */ fw_data = (const __le32 *) (adev->gfx.ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; - WREG32(CP_CE_UCODE_ADDR, 0); + WREG32(mmCP_CE_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_CE_UCODE_ADDR, 0); + WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32(mmCP_CE_UCODE_ADDR, 0); /* ME */ fw_data = (const __be32 *) (adev->gfx.me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; - WREG32(CP_ME_RAM_WADDR, 0); + WREG32(mmCP_ME_RAM_WADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_ME_RAM_WADDR, 0); - + WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); + WREG32(mmCP_ME_RAM_WADDR, 0); - WREG32(CP_PFP_UCODE_ADDR, 0); - WREG32(CP_CE_UCODE_ADDR, 0); - WREG32(CP_ME_RAM_WADDR, 0); - WREG32(CP_ME_RAM_RADDR, 0); + WREG32(mmCP_PFP_UCODE_ADDR, 0); + WREG32(mmCP_CE_UCODE_ADDR, 0); + WREG32(mmCP_ME_RAM_WADDR, 0); + WREG32(mmCP_ME_RAM_RADDR, 0); return 0; } @@ -1720,14 +1757,14 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) int r; u64 rptr_addr; - WREG32(CP_SEM_WAIT_TIMER, 0x0); - WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); + WREG32(mmCP_SEM_WAIT_TIMER, 0x0); + WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); /* Set the write pointer delay */ - WREG32(CP_RB_WPTR_DELAY, 0); + WREG32(mmCP_RB_WPTR_DELAY, 0); - WREG32(CP_DEBUG, 0); - WREG32(SCRATCH_ADDR, 0); + WREG32(mmCP_DEBUG, 0); + WREG32(mmSCRATCH_ADDR, 0); /* ring 0 - compute and gfx */ /* Set ring buffer size */ @@ -1738,24 +1775,24 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif - WREG32(CP_RB0_CNTL, tmp); + WREG32(mmCP_RB0_CNTL, tmp); /* Initialize the ring buffer's read and write pointers */ - WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(CP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, ring->wptr); /* set the wb address whether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32(CP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); - WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); - WREG32(SCRATCH_UMSK, 0); + WREG32(mmSCRATCH_UMSK, 0); mdelay(1); - WREG32(CP_RB0_CNTL, tmp); + WREG32(mmCP_RB0_CNTL, tmp); - WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); + WREG32(mmCP_RB0_BASE, ring->gpu_addr >> 8); /* start the rings */ gfx_v6_0_cp_gfx_start(adev); @@ -1779,11 +1816,11 @@ static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->gfx.gfx_ring[0]) - return RREG32(CP_RB0_WPTR); + return RREG32(mmCP_RB0_WPTR); else if (ring == &adev->gfx.compute_ring[0]) - return RREG32(CP_RB1_WPTR); + return RREG32(mmCP_RB1_WPTR); else if (ring == &adev->gfx.compute_ring[1]) - return RREG32(CP_RB2_WPTR); + return RREG32(mmCP_RB2_WPTR); else BUG(); } @@ -1792,8 +1829,8 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(CP_RB0_WPTR, ring->wptr); - (void)RREG32(CP_RB0_WPTR); + WREG32(mmCP_RB0_WPTR, ring->wptr); + (void)RREG32(mmCP_RB0_WPTR); } static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) @@ -1801,11 +1838,11 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->gfx.compute_ring[0]) { - WREG32(CP_RB1_WPTR, ring->wptr); - (void)RREG32(CP_RB1_WPTR); + WREG32(mmCP_RB1_WPTR, ring->wptr); + (void)RREG32(mmCP_RB1_WPTR); } else if (ring == &adev->gfx.compute_ring[1]) { - WREG32(CP_RB2_WPTR, ring->wptr); - (void)RREG32(CP_RB2_WPTR); + WREG32(mmCP_RB2_WPTR, ring->wptr); + (void)RREG32(mmCP_RB2_WPTR); } else { BUG(); } @@ -1817,7 +1854,7 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; u32 tmp; u32 rb_bufsz; - int r; + int i, r; u64 rptr_addr; /* ring1 - compute only */ @@ -1829,19 +1866,19 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif - WREG32(CP_RB1_CNTL, tmp); + WREG32(mmCP_RB1_CNTL, tmp); - WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(mmCP_RB1_CNTL, tmp | CP_RB1_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(CP_RB1_WPTR, ring->wptr); + WREG32(mmCP_RB1_WPTR, ring->wptr); rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32(CP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); - WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32(mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); mdelay(1); - WREG32(CP_RB1_CNTL, tmp); - WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); + WREG32(mmCP_RB1_CNTL, tmp); + WREG32(mmCP_RB1_BASE, ring->gpu_addr >> 8); ring = &adev->gfx.compute_ring[1]; rb_bufsz = order_base_2(ring->ring_size / 8); @@ -1849,32 +1886,27 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif - WREG32(CP_RB2_CNTL, tmp); + WREG32(mmCP_RB2_CNTL, tmp); - WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(mmCP_RB2_CNTL, tmp | CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(CP_RB2_WPTR, ring->wptr); + WREG32(mmCP_RB2_WPTR, ring->wptr); rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32(CP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr)); - WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmCP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32(mmCP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); mdelay(1); - WREG32(CP_RB2_CNTL, tmp); - WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); + WREG32(mmCP_RB2_CNTL, tmp); + WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); - adev->gfx.compute_ring[0].ready = true; - adev->gfx.compute_ring[1].ready = true; + adev->gfx.compute_ring[0].ready = false; + adev->gfx.compute_ring[1].ready = false; - r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[0]); - if (r) { - adev->gfx.compute_ring[0].ready = false; - return r; - } - - r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[1]); - if (r) { - adev->gfx.compute_ring[1].ready = false; - return r; + for (i = 0; i < 2; i++) { + r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]); + if (r) + return r; + adev->gfx.compute_ring[i].ready = true; } return 0; @@ -1892,24 +1924,26 @@ static int gfx_v6_0_cp_load_microcode(struct amdgpu_device *adev) static void gfx_v6_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) -{ - u32 tmp = RREG32(CP_INT_CNTL_RING0); +{ + u32 tmp = RREG32(mmCP_INT_CNTL_RING0); u32 mask; int i; if (enable) - tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp |= (CP_INT_CNTL__CNTX_BUSY_INT_ENABLE_MASK | + CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE_MASK); else - tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); - WREG32(CP_INT_CNTL_RING0, tmp); + tmp &= ~(CP_INT_CNTL__CNTX_BUSY_INT_ENABLE_MASK | + CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE_MASK); + WREG32(mmCP_INT_CNTL_RING0, tmp); if (!enable) { /* read a gfx register */ - tmp = RREG32(DB_DEPTH_INFO); + tmp = RREG32(mmDB_DEPTH_INFO); mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS; for (i = 0; i < adev->usec_timeout; i++) { - if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS)) + if ((RREG32(mmRLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS)) break; udelay(1); } @@ -1973,9 +2007,9 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); if (vm_id < 8) { - amdgpu_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id )); + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id )); } else { - amdgpu_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -1984,7 +2018,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); + amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 1 << vm_id); @@ -1992,7 +2026,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ WAIT_REG_MEM_ENGINE(0))); /* me */ - amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); + amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); /* ref */ amdgpu_ring_write(ring, 0); /* mask */ @@ -2071,7 +2105,6 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) if (src_ptr) { /* save restore block */ if (adev->gfx.rlc.save_restore_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, @@ -2166,20 +2199,12 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable) { - u32 tmp; - - tmp = RREG32(RLC_LB_CNTL); - if (enable) - tmp |= LOAD_BALANCE_ENABLE; - else - tmp &= ~LOAD_BALANCE_ENABLE; - WREG32(RLC_LB_CNTL, tmp); + WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); if (!enable) { gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32(SPI_LB_CU_MASK, 0x00ff); + WREG32(mmSPI_LB_CU_MASK, 0x00ff); } - } static void gfx_v6_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2187,13 +2212,13 @@ static void gfx_v6_0_wait_for_rlc_serdes(struct amdgpu_device *adev) int i; for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0) + if (RREG32(mmRLC_SERDES_MASTER_BUSY_0) == 0) break; udelay(1); } for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0) + if (RREG32(mmRLC_SERDES_MASTER_BUSY_1) == 0) break; udelay(1); } @@ -2203,20 +2228,20 @@ static void gfx_v6_0_update_rlc(struct amdgpu_device *adev, u32 rlc) { u32 tmp; - tmp = RREG32(RLC_CNTL); + tmp = RREG32(mmRLC_CNTL); if (tmp != rlc) - WREG32(RLC_CNTL, rlc); + WREG32(mmRLC_CNTL, rlc); } static u32 gfx_v6_0_halt_rlc(struct amdgpu_device *adev) { u32 data, orig; - orig = data = RREG32(RLC_CNTL); + orig = data = RREG32(mmRLC_CNTL); - if (data & RLC_ENABLE) { - data &= ~RLC_ENABLE; - WREG32(RLC_CNTL, data); + if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) { + data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK; + WREG32(mmRLC_CNTL, data); gfx_v6_0_wait_for_rlc_serdes(adev); } @@ -2226,7 +2251,7 @@ static u32 gfx_v6_0_halt_rlc(struct amdgpu_device *adev) static void gfx_v6_0_rlc_stop(struct amdgpu_device *adev) { - WREG32(RLC_CNTL, 0); + WREG32(mmRLC_CNTL, 0); gfx_v6_0_enable_gui_idle_interrupt(adev, false); gfx_v6_0_wait_for_rlc_serdes(adev); @@ -2234,7 +2259,7 @@ static void gfx_v6_0_rlc_stop(struct amdgpu_device *adev) static void gfx_v6_0_rlc_start(struct amdgpu_device *adev) { - WREG32(RLC_CNTL, RLC_ENABLE); + WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK); gfx_v6_0_enable_gui_idle_interrupt(adev, true); @@ -2243,13 +2268,9 @@ static void gfx_v6_0_rlc_start(struct amdgpu_device *adev) static void gfx_v6_0_rlc_reset(struct amdgpu_device *adev) { - u32 tmp = RREG32(GRBM_SOFT_RESET); - - tmp |= SOFT_RESET_RLC; - WREG32(GRBM_SOFT_RESET, tmp); + WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); udelay(50); - tmp &= ~SOFT_RESET_RLC; - WREG32(GRBM_SOFT_RESET, tmp); + WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); udelay(50); } @@ -2258,11 +2279,12 @@ static bool gfx_v6_0_lbpw_supported(struct amdgpu_device *adev) u32 tmp; /* Enable LBPW only for DDR3 */ - tmp = RREG32(MC_SEQ_MISC0); + tmp = RREG32(mmMC_SEQ_MISC0); if ((tmp & 0xF0000000) == 0xB0000000) return true; return false; } + static void gfx_v6_0_init_cg(struct amdgpu_device *adev) { } @@ -2283,15 +2305,15 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) gfx_v6_0_init_pg(adev); gfx_v6_0_init_cg(adev); - WREG32(RLC_RL_BASE, 0); - WREG32(RLC_RL_SIZE, 0); - WREG32(RLC_LB_CNTL, 0); - WREG32(RLC_LB_CNTR_MAX, 0xffffffff); - WREG32(RLC_LB_CNTR_INIT, 0); - WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); + WREG32(mmRLC_RL_BASE, 0); + WREG32(mmRLC_RL_SIZE, 0); + WREG32(mmRLC_LB_CNTL, 0); + WREG32(mmRLC_LB_CNTR_MAX, 0xffffffff); + WREG32(mmRLC_LB_CNTR_INIT, 0); + WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); - WREG32(RLC_MC_CNTL, 0); - WREG32(RLC_UCODE_CNTL, 0); + WREG32(mmRLC_MC_CNTL, 0); + WREG32(mmRLC_UCODE_CNTL, 0); hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; @@ -2301,10 +2323,10 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) amdgpu_ucode_print_rlc_hdr(&hdr->header); for (i = 0; i < fw_size; i++) { - WREG32(RLC_UCODE_ADDR, i); - WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32(mmRLC_UCODE_ADDR, i); + WREG32(mmRLC_UCODE_DATA, le32_to_cpup(fw_data++)); } - WREG32(RLC_UCODE_ADDR, 0); + WREG32(mmRLC_UCODE_ADDR, 0); gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); gfx_v6_0_rlc_start(adev); @@ -2316,38 +2338,38 @@ static void gfx_v6_0_enable_cgcg(struct amdgpu_device *adev, bool enable) { u32 data, orig, tmp; - orig = data = RREG32(RLC_CGCG_CGLS_CTRL); + orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { gfx_v6_0_enable_gui_idle_interrupt(adev, true); - WREG32(RLC_GCPM_GENERAL_3, 0x00000080); + WREG32(mmRLC_GCPM_GENERAL_3, 0x00000080); tmp = gfx_v6_0_halt_rlc(adev); - WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); - WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); - WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(mmRLC_SERDES_WR_CTRL, 0x00b000ff); gfx_v6_0_wait_for_rlc_serdes(adev); gfx_v6_0_update_rlc(adev, tmp); - WREG32(RLC_SERDES_WR_CTRL, 0x007000ff); + WREG32(mmRLC_SERDES_WR_CTRL, 0x007000ff); - data |= CGCG_EN | CGLS_EN; + data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; } else { gfx_v6_0_enable_gui_idle_interrupt(adev, false); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); - data &= ~(CGCG_EN | CGLS_EN); + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); } if (orig != data) - WREG32(RLC_CGCG_CGLS_CTRL, data); + WREG32(mmRLC_CGCG_CGLS_CTRL, data); } @@ -2357,51 +2379,51 @@ static void gfx_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable) u32 data, orig, tmp = 0; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { - orig = data = RREG32(CGTS_SM_CTRL_REG); + orig = data = RREG32(mmCGTS_SM_CTRL_REG); data = 0x96940200; if (orig != data) - WREG32(CGTS_SM_CTRL_REG, data); + WREG32(mmCGTS_SM_CTRL_REG, data); if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { - orig = data = RREG32(CP_MEM_SLP_CNTL); - data |= CP_MEM_LS_EN; + orig = data = RREG32(mmCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; if (orig != data) - WREG32(CP_MEM_SLP_CNTL, data); + WREG32(mmCP_MEM_SLP_CNTL, data); } - orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data &= 0xffffffc0; if (orig != data) - WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); tmp = gfx_v6_0_halt_rlc(adev); - WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); - WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); - WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(mmRLC_SERDES_WR_CTRL, 0x00d000ff); gfx_v6_0_update_rlc(adev, tmp); } else { - orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data |= 0x00000003; if (orig != data) - WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); - data = RREG32(CP_MEM_SLP_CNTL); - if (data & CP_MEM_LS_EN) { - data &= ~CP_MEM_LS_EN; - WREG32(CP_MEM_SLP_CNTL, data); + data = RREG32(mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32(mmCP_MEM_SLP_CNTL, data); } - orig = data = RREG32(CGTS_SM_CTRL_REG); - data |= LS_OVERRIDE | OVERRIDE; + orig = data = RREG32(mmCGTS_SM_CTRL_REG); + data |= CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK | CGTS_SM_CTRL_REG__OVERRIDE_MASK; if (orig != data) - WREG32(CGTS_SM_CTRL_REG, data); + WREG32(mmCGTS_SM_CTRL_REG, data); tmp = gfx_v6_0_halt_rlc(adev); - WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); - WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); - WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(mmRLC_SERDES_WR_CTRL, 0x00e000ff); gfx_v6_0_update_rlc(adev, tmp); } @@ -2421,6 +2443,7 @@ static void gfx_v6_0_update_cg(struct amdgpu_device *adev, gfx_v6_0_enable_gui_idle_interrupt(adev, true); } */ + static void gfx_v6_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev, bool enable) { @@ -2435,13 +2458,13 @@ static void gfx_v6_0_enable_cp_pg(struct amdgpu_device *adev, bool enable) { u32 data, orig; - orig = data = RREG32(RLC_PG_CNTL); + orig = data = RREG32(mmRLC_PG_CNTL); if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP)) data &= ~0x8000; else data |= 0x8000; if (orig != data) - WREG32(RLC_PG_CNTL, data); + WREG32(mmRLC_PG_CNTL, data); } static void gfx_v6_0_enable_gds_pg(struct amdgpu_device *adev, bool enable) @@ -2518,26 +2541,13 @@ static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev) static void gfx_v6_0_enable_gfx_cgpg(struct amdgpu_device *adev, bool enable) { - - u32 tmp; - if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { - tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10); - WREG32(RLC_TTOP_D, tmp); - - tmp = RREG32(RLC_PG_CNTL); - tmp |= GFX_PG_ENABLE; - WREG32(RLC_PG_CNTL, tmp); - - tmp = RREG32(RLC_AUTO_PG_CTRL); - tmp |= AUTO_PG_EN; - WREG32(RLC_AUTO_PG_CTRL, tmp); + WREG32(mmRLC_TTOP_D, RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10)); + WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, 1); + WREG32_FIELD(RLC_AUTO_PG_CTRL, AUTO_PG_EN, 1); } else { - tmp = RREG32(RLC_AUTO_PG_CTRL); - tmp &= ~AUTO_PG_EN; - WREG32(RLC_AUTO_PG_CTRL, tmp); - - tmp = RREG32(DB_RENDER_CONTROL); + WREG32_FIELD(RLC_AUTO_PG_CTRL, AUTO_PG_EN, 0); + (void)RREG32(mmDB_RENDER_CONTROL); } } @@ -2550,8 +2560,8 @@ static u32 gfx_v6_0_get_cu_active_bitmap(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); gfx_v6_0_select_se_sh(adev, se, sh, 0xffffffff); - tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); - tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); + tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); @@ -2594,12 +2604,8 @@ static void gfx_v6_0_init_ao_cu_mask(struct amdgpu_device *adev) } } - WREG32(RLC_PG_AO_CU_MASK, tmp); - - tmp = RREG32(RLC_MAX_PG_CU); - tmp &= ~MAX_PU_CU_MASK; - tmp |= MAX_PU_CU(active_cu_number); - WREG32(RLC_MAX_PG_CU, tmp); + WREG32(mmRLC_PG_AO_CU_MASK, tmp); + WREG32_FIELD(RLC_MAX_PG_CU, MAX_POWERED_UP_CU, active_cu_number); } static void gfx_v6_0_enable_gfx_static_mgpg(struct amdgpu_device *adev, @@ -2607,13 +2613,13 @@ static void gfx_v6_0_enable_gfx_static_mgpg(struct amdgpu_device *adev, { u32 data, orig; - orig = data = RREG32(RLC_PG_CNTL); + orig = data = RREG32(mmRLC_PG_CNTL); if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)) - data |= STATIC_PER_CU_PG_ENABLE; + data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; else - data &= ~STATIC_PER_CU_PG_ENABLE; + data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; if (orig != data) - WREG32(RLC_PG_CNTL, data); + WREG32(mmRLC_PG_CNTL, data); } static void gfx_v6_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev, @@ -2621,33 +2627,28 @@ static void gfx_v6_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev, { u32 data, orig; - orig = data = RREG32(RLC_PG_CNTL); + orig = data = RREG32(mmRLC_PG_CNTL); if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)) - data |= DYN_PER_CU_PG_ENABLE; + data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; else - data &= ~DYN_PER_CU_PG_ENABLE; + data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; if (orig != data) - WREG32(RLC_PG_CNTL, data); + WREG32(mmRLC_PG_CNTL, data); } static void gfx_v6_0_init_gfx_cgpg(struct amdgpu_device *adev) { u32 tmp; - WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); - - tmp = RREG32(RLC_PG_CNTL); - tmp |= GFX_PG_SRC; - WREG32(RLC_PG_CNTL, tmp); - - WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); + WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); + WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_SRC, 1); + WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); - tmp = RREG32(RLC_AUTO_PG_CTRL); - - tmp &= ~GRBM_REG_SGIT_MASK; - tmp |= GRBM_REG_SGIT(0x700); - tmp &= ~PG_AFTER_GRBM_REG_ST_MASK; - WREG32(RLC_AUTO_PG_CTRL, tmp); + tmp = RREG32(mmRLC_AUTO_PG_CTRL); + tmp &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; + tmp |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); + tmp &= ~RLC_AUTO_PG_CTRL__PG_AFTER_GRBM_REG_SAVE_THRESHOLD_MASK; + WREG32(mmRLC_AUTO_PG_CTRL, tmp); } static void gfx_v6_0_update_gfx_pg(struct amdgpu_device *adev, bool enable) @@ -2703,7 +2704,6 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); buffer[count++] = cpu_to_le32(0x80000000); buffer[count++] = cpu_to_le32(0x80000000); @@ -2723,7 +2723,7 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, } buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); switch (adev->asic_type) { case CHIP_TAHITI: @@ -2766,16 +2766,16 @@ static void gfx_v6_0_init_pg(struct amdgpu_device *adev) gfx_v6_0_enable_cp_pg(adev, true); gfx_v6_0_enable_gds_pg(adev, true); } else { - WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); - WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); + WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); + WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); } gfx_v6_0_init_ao_cu_mask(adev); gfx_v6_0_update_gfx_pg(adev, true); } else { - WREG32(RLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); - WREG32(RLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); + WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8); + WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8); } } @@ -2800,23 +2800,61 @@ static uint64_t gfx_v6_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { + if (flags & AMDGPU_HAVE_CTX_SWITCH) + gfx_v6_0_ring_emit_vgt_flush(ring); amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, 0x80000000); amdgpu_ring_write(ring, 0); } + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +{ + WREG32(mmSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK)); + return RREG32(mmSQ_IND_DATA); +} + +static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +{ + /* type 0 wave data */ + dst[(*no_fields)++] = 0; + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); +} + static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter, .select_se_sh = &gfx_v6_0_select_se_sh, + .read_wave_data = &gfx_v6_0_read_wave_data, }; static int gfx_v6_0_early_init(void *handle) @@ -2967,7 +3005,7 @@ static bool gfx_v6_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (RREG32(GRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK) + if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK) return false; else return true; @@ -2998,14 +3036,14 @@ static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); - cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); + cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); - cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); + cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; default: break; @@ -3020,27 +3058,27 @@ static void gfx_v6_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, switch (state){ case AMDGPU_IRQ_STATE_DISABLE: if (ring == 0) { - cp_int_cntl = RREG32(CP_INT_CNTL_RING1); - cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING1, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING1); + cp_int_cntl &= ~CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING1, cp_int_cntl); break; } else { - cp_int_cntl = RREG32(CP_INT_CNTL_RING2); - cp_int_cntl &= ~CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING2, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING2); + cp_int_cntl &= ~CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING2, cp_int_cntl); break; } case AMDGPU_IRQ_STATE_ENABLE: if (ring == 0) { - cp_int_cntl = RREG32(CP_INT_CNTL_RING1); - cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING1, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING1); + cp_int_cntl |= CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING1, cp_int_cntl); break; } else { - cp_int_cntl = RREG32(CP_INT_CNTL_RING2); - cp_int_cntl |= CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING2, cp_int_cntl); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING2); + cp_int_cntl |= CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mmCP_INT_CNTL_RING2, cp_int_cntl); break; } @@ -3061,14 +3099,14 @@ static int gfx_v6_0_set_priv_reg_fault_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; default: break; @@ -3086,14 +3124,14 @@ static int gfx_v6_0_set_priv_inst_fault_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - cp_int_cntl = RREG32(CP_INT_CNTL_RING0); + cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK; - WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); break; default: break; @@ -3133,7 +3171,7 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev, break; case 1: case 2: - amdgpu_fence_process(&adev->gfx.compute_ring[entry->ring_id -1]); + amdgpu_fence_process(&adev->gfx.compute_ring[entry->ring_id - 1]); break; default: break; @@ -3236,7 +3274,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ 17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ - 3, /* gfx_v6_ring_emit_cntxcntl */ + 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, .emit_fence = gfx_v6_0_ring_emit_fence, |