diff options
| author | Dave Airlie <airlied@redhat.com> | 2017-08-21 10:49:48 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2017-08-21 10:49:48 +1000 |
| commit | a0aeb3b2ac3ff0608f1bc3fb46332148856d3276 (patch) | |
| tree | 26ec19e95ddfc02fef15821b86c9928102ec0d07 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |
| parent | 5fd27c2a1f75439ae72212e730c3bd73868f487c (diff) | |
| parent | 5d71dbc3a588690c3d66d76db8cd29973425ce6d (diff) | |
| download | talos-op-linux-a0aeb3b2ac3ff0608f1bc3fb46332148856d3276.tar.gz talos-op-linux-a0aeb3b2ac3ff0608f1bc3fb46332148856d3276.zip | |
Merge tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux into drm-next
This is the amdkfd pull request for 4.14 merge window.
AMD has started cleaning the pipe and sending patches from their internal
development to the upstream community.
The plan as I understand it is to first get all the non-dGPU patches to
upstream and then move to upstream dGPU support.
The patches here are relevant only for Kaveri and Carrizo.
The following is a summary of the changes:
- Add new IOCTL to set a Scratch memory VA
- Update PM4 headers for new firmware that support scratch memory
- Support image tiling mode
- Remove all uses of BUG_ON
- Various Bug fixes and coding style fixes
* tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
drm/amdkfd: Implement image tiling mode support v2
drm/amdgpu: Add kgd kfd interface get_tile_config() v2
drm/amdkfd: Adding new IOCTL for scratch memory v2
drm/amdgpu: Add kgd/kfd interface to support scratch memory v2
drm/amdgpu: Program SH_STATIC_MEM_CONFIG globally, not per-VMID
drm/amd: Update MEC HQD loading code for KFD
drm/amdgpu: Disable GFX PG on CZ
drm/amdkfd: Update PM4 packet headers
drm/amdkfd: Clamp EOP queue size correctly on Gfx8
drm/amdkfd: Add more error printing to help bringup v2
drm/amdkfd: Handle remaining BUG_ONs more gracefully v2
drm/amdkfd: Allocate gtt_sa_bitmap in long units
drm/amdkfd: Fix doorbell initialization and finalization
drm/amdkfd: Remove BUG_ONs for NULL pointer arguments
drm/amdkfd: Remove usage of alloc(sizeof(struct...
drm/amdkfd: Fix goto usage v2
drm/amdkfd: Change x==NULL/false references to !x
drm/amdkfd: Consolidate and clean up log commands
drm/amdkfd: Clean up KFD style errors and warnings v2
drm/amdgpu: Remove hard-coded assumptions about compute pipes
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 189 |
1 files changed, 162 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5254562fd0f9..b9dbbf9cb8b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -39,6 +39,12 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 @@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -150,7 +186,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -186,7 +224,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); @@ -290,20 +328,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; m = get_mqd(mqd); - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - if (is_wptr_shadow_valid) - m->cp_hqd_pq_wptr = wptr_shadow; - acquire_queue(kgd, pipe_id, queue_id); - gfx_v7_0_mqd_commit(adev, m); + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Copy userspace write pointer value to register. + * Activate doorbell logic to monitor subsequent changes. + */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + + if (read_user_wptr(mm, wptr, wptr_val)) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(mmCP_HQD_ACTIVE, data); + release_queue(kgd); return 0; @@ -382,30 +438,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - int timeout = utimeout; + enum hqd_dequeue_request_type type; + unsigned long flags, end_jiffies; + int retry; acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; - if (timeout <= 0) { - pr_err("kfd: cp queue preemption time out.\n"); + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out\n"); release_queue(kgd); return -ETIME; } - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } release_queue(kgd); @@ -556,6 +681,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -566,42 +701,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: |

