diff options
Diffstat (limited to 'drivers/gpu')
204 files changed, 3369 insertions, 1230 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 58e9772c2586..bfdadc3667e0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -180,6 +180,13 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_TTM_DMA_PAGE_POOL + bool + depends on DRM_TTM && (SWIOTLB || INTEL_IOMMU) + default y + help + Choose this if you need the TTM dma page pool + config DRM_VRAM_HELPER tristate depends on DRM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0bb08e0e1611..bcc5d40a8d5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -977,6 +977,9 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae6f5446262c..12dbcfaa34b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +/* Estimate page table size needed to represent a given memory size + * + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize + * for 2MB pages for TLB efficiency. However, small allocations and + * fragmented system memory still need some 4KB pages. We choose a + * compromise that should work in most cases without reserving too + * much memory for page tables unnecessarily (factor 16K, >> 14). + */ +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; - uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 649e68c4479b..d1495e1c9289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct dma_fence *fence = NULL; + struct dma_fence *fence; int i, r; start_jiffies = jiffies; @@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; r = dma_fence_wait(fence, false); + dma_fence_put(fence); if (r) goto exit_do_move; - dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: - if (fence) - dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..2cdaf3b2a721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) + for (i = 0; i < num_entities; i++) { + mutex_lock(&ctx->adev->lock_reset); drm_sched_entity_fini(&ctx->entities[0][i].entity); + mutex_unlock(&ctx->adev->lock_reset); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 693f17e78791..8e6726e0d035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -859,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -884,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } + mutex_unlock(&adev->lock_reset); + return 0; } @@ -1036,6 +1041,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) if (!fences) return -ENOMEM; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* stop the scheduler */ kthread_park(ring->sched.thread); @@ -1075,6 +1083,8 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); + mutex_unlock(&adev->lock_reset); + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); kfree(fences); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d36d2b093539..4f76beafb2fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2057,6 +2057,7 @@ out: */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { + struct amdgpu_gpu_instance *gpu_instance; int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2082,8 +2083,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); - /* set to low pstate by default */ - amdgpu_xgmi_set_pstate(adev, 0); + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + mutex_lock(&mgpu_info.mutex); + + /* + * Reset device p-state to low as this was booted with high. + * + * This should be performed only after all devices from the same + * hive get initialized. + * + * However, it's unknown how many device in the hive in advance. + * As this is counted one by one during devices initializations. + * + * So, we wait for all XGMI interlinked devices initialized. + * This may bring some delays as those devices may come from + * different hives. But that should be OK. + */ + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev->flags & AMD_IS_APU) + continue; + + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + if (r) { + DRM_ERROR("pstate setting failed (%d).\n", r); + break; + } + } + } + + mutex_unlock(&mgpu_info.mutex); + } return 0; } @@ -3020,6 +3052,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ @@ -3070,7 +3109,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -3088,7 +3129,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->delayed_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d2dd59a95e8a..3cadb0b76f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -514,7 +514,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN && + adev->asic_type < CHIP_RAVEN && (adev->flags & AMD_IS_APU) && (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d6ce018c8f2..0ffc9447b573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -998,9 +998,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c9d1fada6188..e00b46180d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 459aa9059542..0ae0a2715b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -225,7 +225,7 @@ struct amdgpu_me { uint32_t num_me; uint32_t num_pipe_per_me; uint32_t num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); @@ -267,6 +267,7 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 555d8e57fae9..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; @@ -127,18 +128,48 @@ struct amdgpu_xgmi { }; struct amdgpu_gmc { + /* FB's physical address in MMIO space (for CPU to + * map FB). This is different compared to the agp/ + * gart/vram_start/end field as the later is from + * GPU's view and aper_base is from CPU's view. + */ resource_size_t aper_size; resource_size_t aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + /* AGP aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place AGP by setting MC_VM_AGP_BOT/TOP registers + * Under VMID0, logical address == MC address. AGP + * aperture maps to physical bus or IOVA addressed. + * AGP aperture is used to simulate FB in ZFB case. + * AGP aperture is also used for page table in system + * memory (mainly for APU). + * + */ u64 agp_size; u64 agp_start; u64 agp_end; + /* GART aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR + * registers + * Under VMID0, logical address inside GART aperture will + * be translated through gpuvm gart page table to access + * paged system memory + */ u64 gart_size; u64 gart_start; u64 gart_end; + /* Frame buffer aperture of this GPU device. Different from + * fb_start (see below), this only covers the local GPU device. + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) + * and calculate vram_start of this local device by adding an + * offset inside the XGMI hive. + * Under VMID0, logical address == MC address + */ u64 vram_start; u64 vram_end; /* FB region , it's same as local vram region in single GPU, in XGMI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6f3b03f6224f..30d540d23b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -311,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) drm_irq_uninstall(adev->ddev); adev->irq.installed = false; if (adev->irq.msi_enabled) - pci_disable_msi(adev->pdev); + pci_free_irq_vectors(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 137a8573d556..b6db28a570c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -656,15 +655,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bbe9ac7e843f..44be3a45b25e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -567,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -756,6 +758,12 @@ static int psp_ras_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->ras.ras_initialized) return 0; @@ -777,6 +785,18 @@ static int psp_ras_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + if (!psp->ras.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) @@ -866,6 +886,18 @@ static int psp_hdcp_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + if (!psp->hdcp_context.hdcp_initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) @@ -948,6 +980,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->hdcp_context.hdcp_initialized) return 0; @@ -1039,6 +1077,18 @@ static int psp_dtm_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + if (!psp->dtm_context.dtm_initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) @@ -1091,6 +1141,12 @@ static int psp_dtm_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->dtm_context.dtm_initialized) return 0; @@ -1411,7 +1467,10 @@ out: || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) /*skip ucode loading in SRIOV VF */ continue; @@ -1428,8 +1487,8 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (psp->autoload_supported && ucode->ucode_id == - AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? + AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dab90c280476..404483437bd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * As their names indicate, inject operation will write the * value to the address. * - * Second member: struct ras_debug_if::op. + * The second member: struct ras_debug_if::op. * It has three kinds of operations. * * - 0: disable RAS on the block. Take ::head as its data. @@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * - 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? - * programs: - * copy the struct ras_debug_if in your codes and initialize it. - * write the struct to the control node. + * + * Programs + * + * Copy the struct ras_debug_if in your codes and initialize it. + * Write the struct to the control node. + * + * Shells * * .. code-block:: bash * * echo op block [error [sub_block address value]] > .../ras/ras_ctrl * + * Parameters: + * * op: disable, enable, inject * disable: only block is needed * enable: block and error are needed @@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * .. note:: - * Operation is only allowed on blocks which are supported. + * Operations are only allowed on blocks which are supported. * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * to see which blocks support RAS on a particular asic. + * */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * * DOC: AMDGPU RAS debugfs EEPROM table reset interface * * Some boards contain an EEPROM which is used to persistently store a list of - * bad pages containing ECC errors detected in vram. This interface provides + * bad pages which experiences ECC errors in vram. This interface provides * a way to reset the EEPROM, e.g., after testing error injection. * * Usage: @@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { /** * DOC: AMDGPU RAS sysfs Error Count Interface * - * It allows user to read the error count for each IP block on the gpu through + * It allows the user to read the error count for each IP block on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * It outputs the multiple lines which report the uncorrected (ue) and corrected @@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) } /* sysfs end */ +/** + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors + * + * Normally when there is an uncorrectable error, the driver will reset + * the GPU to recover. However, in the event of an unrecoverable error, + * the driver provides an interface to reboot the system automatically + * in that event. + * + * The following file in debugfs provides that interface: + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot + * + * Usage: + * + * .. code-block:: bash + * + * echo true > .../ras/auto_reboot + * + */ /* debugfs begin */ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 7de16c0c2f20..2a8e04895595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -27,7 +27,8 @@ #include <linux/bits.h> #include "smu_v11_0_i2c.h" -#define EEPROM_I2C_TARGET_ADDR 0xA0 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 +#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, { int ret = 0; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = 0, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, *(uint16_t *)buff = EEPROM_HDR_START; __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + msg.addr = control->i2c_address; + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); @@ -203,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = I2C_M_RD, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -213,10 +216,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) switch (adev->asic_type) { case CHIP_VEGA20: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20; ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); break; case CHIP_ARCTURUS: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS; ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); break; @@ -229,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) return ret; } + msg.addr = control->i2c_address; + /* Read/Create table header from EEPROM address 0 */ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) { @@ -408,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, * Update bits 16,17 of EEPROM address in I2C address by setting them * to bits 1,2 of Device address byte */ - msg->addr = EEPROM_I2C_TARGET_ADDR | - ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->addr = control->i2c_address | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); msg->flags = write ? 0 : I2C_M_RD; msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; msg->buf = buff; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 622269957c1b..ca78f812d436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control { struct mutex tbl_mutex; bool bus_locked; uint32_t tbl_byte_sum; + uint16_t i2c_address; // 8-bit represented address }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index c8793e6cc3c5..6373bfb47d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) */ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; u32 dws; int r; /* allocate clear state block */ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) return r; } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b66d29d5ffa2..b158230af8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f940526c5889..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(ring, sched_job->base.sched->name); + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5d916169226..61d9b7774d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1906,9 +1906,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev) void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); - - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); } /** @@ -1921,7 +1918,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b4dd89af6f09..e324bfe6c58f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 703677f2ff6f..46b590af2fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -216,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3199e4a5ff12..9d870444d7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -193,6 +193,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, &adev->vcn.dpg_sram_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e775f271f1ed..598c24505c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1418,6 +1418,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; + /* make sure that the page tables covering the address range are + * actually allocated + */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, params->direct); if (r) @@ -1491,7 +1494,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries */ + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ while (cursor.pfn < frag_start) { amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 00371713c671..61d13d8b7b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -274,22 +274,55 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; if (!hive) return 0; - if (hive->pstate == pstate) - return 0; + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + adev->pstate = is_high_pstate ? pstate : adev->pstate; + goto out; + } dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); if (is_support_sw_smu_xgmi(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); - if (ret) + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate || is_high_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); return ret; } @@ -364,6 +397,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } + /* Set default device pstate */ + adev->pstate = -1; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 2d64d270725d..b22a10b2d201 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1346,10 +1346,13 @@ static int cik_asic_reset(struct amdgpu_device *adev) { int r; - if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = cik_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e7bab4f094b3..f2c1b026397b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); +} + + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -664,59 +690,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - version_major = le16_to_cpu(rlc_hdr->header.header_version_major); - version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = + if (!amdgpu_sriov_vf(adev)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = + adev->gfx.rlc.clear_state_descriptor_offset = le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = + adev->gfx.rlc.avail_scratch_ram_locations = le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = + adev->gfx.rlc.reg_restore_list_size = le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = + adev->gfx.rlc.reg_list_format_start = le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = + adev->gfx.rlc.reg_list_format_separate_start = le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = + adev->gfx.rlc.starting_offsets_start = le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = + adev->gfx.rlc.reg_list_format_size_bytes = le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = + adev->gfx.rlc.reg_list_size_bytes = le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = + adev->gfx.rlc.register_list_format = kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v10_0_init_rlc_ext_microcode(adev); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -832,6 +860,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } } + gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, @@ -966,39 +995,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1758,27 +1754,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); -} - -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) -{ - int i; - - gfx_v10_0_init_csb(adev); - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); - - /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1873,18 +1860,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (amdgpu_sriov_vf(adev)) - return 0; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); - /* enable RLC SRM */ - gfx_v10_0_rlc_enable_srm(adev); + gfx_v10_0_init_csb(adev); + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); } else { adev->gfx.rlc.funcs->stop(adev); @@ -1906,7 +1891,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + gfx_v10_0_init_csb(adev); + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -2373,7 +2359,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) return 0; } -static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); @@ -2386,7 +2372,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; } static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) @@ -2757,7 +2753,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* Init gfx ring 0 for pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); - mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ ring = &adev->gfx.gfx_ring[0]; rb_bufsz = order_base_2(ring->ring_size / 8); @@ -2795,11 +2791,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - mutex_unlock(&adev->srbm_mutex); ring = &adev->gfx.gfx_ring[1]; rb_bufsz = order_base_2(ring->ring_size / 8); tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); @@ -2829,6 +2825,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); @@ -3087,6 +3084,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); @@ -3098,12 +3096,12 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) #endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; adev->wb.wb[ring->wptr_offs] = 0; @@ -3706,10 +3704,6 @@ static int gfx_v10_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gfx_v10_0_csb_vram_pin(adev); - if (r) - return r; - if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -3792,12 +3786,11 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); if (amdgpu_sriov_vf(adev)) { - pr_debug("For SRIOV client, shouldn't do anything.\n"); + gfx_v10_0_cp_gfx_enable(adev, false); return 0; } gfx_v10_0_cp_enable(adev, false); gfx_v10_0_enable_gui_idle_interrupt(adev, false); - gfx_v10_0_csb_vram_unpin(adev); return 0; } @@ -4766,6 +4759,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5156,6 +5167,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -5189,6 +5201,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -5219,6 +5232,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 791ba398f007..d92e92e5d50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ffbde9136372..983db77999e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3917,6 +3884,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v8_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32(mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -4837,10 +4805,6 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4958,8 +4922,6 @@ static int gfx_v8_0_hw_fini(void *handle) pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); - gfx_v8_0_csb_vram_unpin(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 39297baedfb4..66328ffa395a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -704,6 +704,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -986,6 +987,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && @@ -1044,8 +1052,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || @@ -1057,6 +1070,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; break; + case CHIP_RENOIR: + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; default: break; } @@ -1676,39 +1695,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -2396,6 +2382,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -2725,7 +2712,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - gfx_v9_1_init_rlc_save_restore_list(adev); + if (adev->asic_type == CHIP_VEGA12 || + (adev->asic_type == CHIP_RAVEN && + adev->rev_id >= 8)) + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); } @@ -3684,10 +3674,6 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); - r = gfx_v9_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3769,8 +3755,6 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); - gfx_v9_0_csb_vram_unpin(adev); - return 0; } @@ -3876,9 +3860,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 5e9ab8eb214a..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + u32 max_num_physical_nodes = 0; + u32 max_physical_node_id = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + max_num_physical_nodes = 4; + max_physical_node_id = 3; + break; + case CHIP_ARCTURUS: + max_num_physical_nodes = 8; + max_physical_node_id = 7; + break; + default: + return -EINVAL; + } /* PF_MAX_REGION=0 means xgmi is disabled */ if (max_region) { adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > 4) + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) return -EINVAL; adev->gmc.xgmi.physical_node_id = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > 3) + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) return -EINVAL; adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6e1b25bd1fe7..232469507446 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -292,7 +326,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || - adev->in_gpu_reset) { + adev->in_gpu_reset || + ring->sched.ready == false) { gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; @@ -338,17 +373,38 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..3c355fb5d2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, break; udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) return; @@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 657970f9ebfb..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); @@ -495,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a55a2e83fb19..0ba66bef5746 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -40,6 +40,7 @@ #include "gc/gc_10_1_0_sh_mask.h" #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -156,8 +157,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) static bool nv_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) { - /* TODO: will implement it when SMU header is available */ - return false; + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; } static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { @@ -539,6 +559,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + static void nv_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -586,6 +616,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .need_full_reset = &nv_need_full_reset, .get_pcie_usage = &nv_get_pcie_usage, .need_reset_on_init = &nv_need_reset_on_init, + .get_pcie_replay_count = &nv_get_pcie_replay_count, }; static int nv_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b8fdb192f6d6..f4ad2990f973 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, @@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .pad_ib = sdma_v5_0_ring_pad_ib, .emit_wreg = sdma_v5_0_ring_emit_wreg, .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 57bb5f9e08b2..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 741b564b4aa5..8e1640bc07af 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1145,7 +1145,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else if (adev->pdev->device == 0x15d8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -1188,7 +1190,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } break; case CHIP_ARCTURUS: @@ -1233,11 +1237,6 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x91; - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..57af489a5de3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 03083e5f731a..93edf9193a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -293,7 +293,7 @@ static int vcn_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i; + int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -305,8 +305,8 @@ static int vcn_v2_5_hw_fini(void *handle) ring->sched.ready = false; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[i].ring_enc[i]; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; ring->sched.ready = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78e5cdc0c058..f1b171e30774 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -783,10 +783,13 @@ static int vi_asic_reset(struct amdgpu_device *adev) { int r; - if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = vi_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index a1a35d4d594b..ba0e68057a89 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -5,7 +5,7 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a6173d727e3..7aac9568d3be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -719,7 +719,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) */ if (adev->flags & AMD_IS_APU && adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN) + adev->asic_type < CHIP_RAVEN) init_data.flags.gpu_vm_support = true; if (amdgpu_dc_feature_mask & DC_FBC_MASK) @@ -4239,8 +4239,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec result = MODE_OK; else DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n", - mode->vdisplay, mode->hdisplay, + mode->vdisplay, mode->clock, dc_result); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 49cf39711dfc..2bf8534c18fb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,7 +36,9 @@ #include "dc_link_ddc.h" #include "i2caux_interface.h" - +#if defined(CONFIG_DEBUG_FS) +#include "amdgpu_dm_debugfs.h" +#endif /* #define TRACE_DPCD */ #ifdef TRACE_DPCD @@ -147,6 +149,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) to_amdgpu_dm_connector(connector); struct drm_dp_mst_port *port = amdgpu_dm_connector->port; +#if defined(CONFIG_DEBUG_FS) + connector_debugfs_init(amdgpu_dm_connector); + amdgpu_dm_connector->debugfs_dpcd_address = 0; + amdgpu_dm_connector->debugfs_dpcd_size = 0; +#endif + return drm_dp_mst_connector_late_register(connector, port); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 55a520a63712..778f186b3a05 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type( if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) { if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { - /* Error in pplib. Provide default values. */ + /* Error in pplib. Provide default values. */ + get_default_clock_levels(clk_type, dc_clks); return true; } } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) { diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 985633c08a26..26c6d735cdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,15 +24,20 @@ # It calculates Bandwidth and Watermarks values for HW programming # -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +calcs_ccflags := -mhard-float -msse -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +calcs_ccflags += -mpreferred-stack-boundary=4 +else calcs_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cc94c1a73daa..12ba6fdf89b7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3027,15 +3027,6 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); - /* This second call is needed to reconfigure the DIG - * as a workaround for the incorrect value being applied - * from transmitter control. - */ - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) - stream->link->link_enc->funcs->setup( - stream->link->link_enc, - pipe_ctx->stream->signal); - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index ddb8d5649e79..63f3bddba7da 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT DCN20 += dcn20_dsc.o endif -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 921a36668ced..ac8c18fadefc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1037,6 +1037,25 @@ void dcn20_pipe_control_lock( if (pipe->plane_state != NULL) flip_immediate = pipe->plane_state->flip_immediate; + if (flip_immediate && lock) { + const int TIMEOUT_FOR_FLIP_PENDING = 100000; + int i; + + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp)) + break; + udelay(1); + } + + if (pipe->bottom_pipe != NULL) { + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp)) + break; + udelay(1); + } + } + } + /* In flip immediate and pipe splitting case, we need to use GSL * for synchronization. Only do setup on locking and on flip type change. */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 924c2e303588..09793336d84f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -157,6 +157,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = { .xfc_fill_constant_bytes = 0, }; +struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = { + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .num_dsc = 5, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 4, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 5, + .max_num_dpp = 5, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .xfc_supported = true, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .ptoi_supported = 0 +}; + struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { /* Defaults that get patched on driver load from firmware. */ .clock_limits = { @@ -854,6 +922,8 @@ static const struct resource_caps res_cap_nv14 = { .num_pll = 5, .num_dwb = 1, .num_ddc = 5, + .num_vmid = 16, + .num_dsc = 5, }; static const struct dc_debug_options debug_defaults_drv = { @@ -1152,6 +1222,11 @@ struct stream_encoder *dcn20_stream_encoder_create( if (!enc1) return NULL; + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + if (eng_id >= ENGINE_ID_DIGD) + eng_id++; + } + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); @@ -3207,6 +3282,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params( uint32_t hw_internal_rev) { + /* NV14 */ + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_ip; + /* NV12 and NV10 */ return &dcn2_0_ip; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 19fabac13c65..14113ccf498d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,15 +3,20 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o dcn21_hwseq.o dcn21_link_encoder.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 5b2a65b42403..8df251626e22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,15 +24,20 @@ # It provides the general basic services required by other DAL # subcomponents. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dml_ccflags := -mhard-float -msse -dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml_ccflags += -mpreferred-stack-boundary=4 +else dml_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index b456cd23c6fa..970737217e53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,15 +1,20 @@ # # Makefile for the 'dsc' sub-component of DAL. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dsc_ccflags := -mhard-float -msse -dsc_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dsc_ccflags += -mpreferred-stack-boundary=4 +else dsc_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5902f80d1fce..a7f92d0b3a90 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -220,6 +220,9 @@ enum pp_df_cstate { ((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \ (support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT) +#define XGMI_MODE_PSTATE_D3 0 +#define XGMI_MODE_PSTATE_D0 1 + struct seq_file; enum amd_pp_clock_type; struct amd_pp_simple_clock_info; @@ -318,6 +321,7 @@ struct amd_pm_funcs { int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); int (*asic_reset_mode_2)(void *handle); int (*set_df_cstate)(void *handle, enum pp_df_cstate state); + int (*set_xgmi_pstate)(void *handle, uint32_t pstate); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index f4ff15378e61..7932eb163a00 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -969,6 +969,14 @@ static int pp_dpm_switch_power_profile(void *handle, workload = hwmgr->workload_setting[index]; } + if (type == PP_SMC_POWER_PROFILE_COMPUTE && + hwmgr->hwmgr_func->disable_power_features_for_compute_performance) { + if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) { + mutex_unlock(&hwmgr->smu_lock); + return -EINVAL; + } + } + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0); mutex_unlock(&hwmgr->smu_lock); @@ -1566,6 +1574,23 @@ static int pp_set_df_cstate(void *handle, enum pp_df_cstate state) return 0; } +static int pp_set_xgmi_pstate(void *handle, uint32_t pstate) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1625,4 +1650,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .asic_reset_mode_2 = pp_asic_reset_mode_2, .smu_i2c_bus_access = pp_smu_i2c_bus_access, .set_df_cstate = pp_set_df_cstate, + .set_xgmi_pstate = pp_set_xgmi_pstate, }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 4e468b0272c3..5ff7ccedfbed 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -383,14 +383,25 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type) return true; } - +/** + * smu_dpm_set_power_gate - power gate/ungate the specific IP block + * + * @smu: smu_context pointer + * @block_type: the IP block to power gate/ungate + * @gate: to power gate if true, ungate otherwise + * + * This API uses no smu->mutex lock protection due to: + * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). + * This is guarded to be race condition free by the caller. + * 2. Or get called on user setting request of power_dpm_force_performance_level. + * Under this case, the smu->mutex lock protection is already enforced on + * the parent API smu_force_performance_level of the call path. + */ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, bool gate) { int ret = 0; - mutex_lock(&smu->mutex); - switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: ret = smu_dpm_set_uvd_enable(smu, gate); @@ -408,8 +419,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, break; } - mutex_unlock(&smu->mutex); - return ret; } @@ -526,7 +535,7 @@ bool is_support_sw_smu(struct amdgpu_device *adev) bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) { - if (amdgpu_dpm != 1) + if (!is_support_sw_smu(adev)) return false; if (adev->asic_type == CHIP_VEGA20) @@ -582,10 +591,18 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) smu_table->power_play_table = smu_table->hardcode_pptable; smu_table->power_play_table_size = size; + /* + * Special hw_fini action(for Navi1x, the DPMs disablement will be + * skipped) may be needed for custom pptable uploading. + */ + smu->uploading_custom_pp_table = true; + ret = smu_reset(smu); if (ret) pr_info("smu reset failed, ret = %d\n", ret); + smu->uploading_custom_pp_table = false; + failed: mutex_unlock(&smu->mutex); return ret; @@ -705,8 +722,12 @@ static int smu_set_funcs(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + switch (adev->asic_type) { case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; vega20_set_ppt_funcs(smu); break; case CHIP_NAVI10: @@ -715,7 +736,10 @@ static int smu_set_funcs(struct amdgpu_device *adev) navi10_set_ppt_funcs(smu); break; case CHIP_ARCTURUS: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; arcturus_set_ppt_funcs(smu); + /* OD is not supported on Arcturus */ + smu->od_enabled =false; break; case CHIP_RENOIR: renoir_set_ppt_funcs(smu); @@ -724,9 +748,6 @@ static int smu_set_funcs(struct amdgpu_device *adev) return -EINVAL; } - if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) - smu->od_enabled = true; - return 0; } @@ -1057,10 +1078,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; if (adev->asic_type != CHIP_ARCTURUS) { - ret = smu_override_pcie_parameters(smu); - if (ret) - return ret; - ret = smu_notify_display_change(smu); if (ret) return ret; @@ -1089,6 +1106,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; } + if (adev->asic_type != CHIP_ARCTURUS) { + ret = smu_override_pcie_parameters(smu); + if (ret) + return ret; + } + ret = smu_set_default_od_settings(smu, initialize); if (ret) return ret; @@ -1098,7 +1121,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false); + ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false); if (ret) return ret; } @@ -1282,10 +1305,25 @@ static int smu_hw_fini(void *handle) return ret; } - ret = smu_stop_dpms(smu); - if (ret) { - pr_warn("Fail to stop Dpms!\n"); - return ret; + /* + * For custom pptable uploading, skip the DPM features + * disable process on Navi1x ASICs. + * - As the gfx related features are under control of + * RLC on those ASICs. RLC reinitialization will be + * needed to reenable them. That will cost much more + * efforts. + * + * - SMU firmware can handle the DPM reenablement + * properly. + */ + if (!smu->uploading_custom_pp_table || + !((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) { + ret = smu_stop_dpms(smu); + if (ret) { + pr_warn("Fail to stop Dpms!\n"); + return ret; + } } kfree(table_context->driver_pptable); @@ -2500,3 +2538,22 @@ int smu_get_dpm_clock_table(struct smu_context *smu, return ret; } + +uint32_t smu_get_pptable_power_limit(struct smu_context *smu) +{ + uint32_t ret = 0; + + if (smu->ppt_funcs->get_pptable_power_limit) + ret = smu->ppt_funcs->get_pptable_power_limit(smu); + + return ret; +} + +int smu_send_smc_msg(struct smu_context *smu, + enum smu_message_type msg) +{ + int ret; + + ret = smu_send_smc_msg_with_param(smu, msg, 0); + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3099ac256bd3..ce3566ca3e24 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1261,15 +1261,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu, static int arcturus_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1292,17 +1291,11 @@ static int arcturus_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -2070,6 +2063,13 @@ static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control) i2c_del_adapter(control); } +static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2130,7 +2130,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, @@ -2160,6 +2159,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .get_pptable_power_limit = arcturus_get_pptable_power_limit, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index a24beaa4fb01..d2909c91d65b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev; + if (!hwmgr) return -EINVAL; @@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr_init_workload_prority(hwmgr); hwmgr->gfxoff_state_changed_by_workload = false; + adev = hwmgr->adev; + switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | @@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_CZ: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: @@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_AI: switch (hwmgr->chip_id) { case CHIP_VEGA10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega10_smu_funcs; vega10_hwmgr_init(hwmgr); @@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) vega12_hwmgr_init(hwmgr); break; case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega20_smu_funcs; vega20_hwmgr_init(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c805c6fcdba2..f73dff68e799 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3477,18 +3477,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { + struct amdgpu_device *adev = hwmgr->adev; int i; u32 tmp = 0; if (!query) return -EINVAL; - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); - tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); - *query = tmp; + /* + * PPSMC_MSG_GetCurrPkgPwr is not supported on: + * - Hawaii + * - Bonaire + * - Fiji + * - Tonga + */ + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) { + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - if (tmp != 0) - return 0; + if (tmp != 0) + return 0; + } smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, @@ -3969,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if (hwmgr->hardcode_pp_table != NULL) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + tmp_result = smu7_update_avfs(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update avfs voltages!", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4ea63a2e17da..d71a492c87a3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3689,6 +3689,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if(hwmgr->hardcode_pp_table != NULL) + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + vega10_update_avfs(hwmgr); /* @@ -5263,6 +5270,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_ return 0; } +static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable) +{ + struct vega10_hwmgr *data = hwmgr->backend; + uint32_t feature_mask = 0; + + if (disable) { + feature_mask |= data->smu_features[GNLD_ULV].enabled ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } else { + feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } + + if (feature_mask) + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, + !disable, feature_mask), + "enable/disable power features for compute performance Failed!", + return -EINVAL); + + if (disable) { + data->smu_features[GNLD_ULV].enabled = false; + data->smu_features[GNLD_DS_GFXCLK].enabled = false; + data->smu_features[GNLD_DS_SOCCLK].enabled = false; + data->smu_features[GNLD_DS_LCLK].enabled = false; + data->smu_features[GNLD_DS_DCEFCLK].enabled = false; + } else { + data->smu_features[GNLD_ULV].enabled = true; + data->smu_features[GNLD_DS_GFXCLK].enabled = true; + data->smu_features[GNLD_DS_SOCCLK].enabled = true; + data->smu_features[GNLD_DS_LCLK].enabled = true; + data->smu_features[GNLD_DS_DCEFCLK].enabled = true; + } + + return 0; + +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -5330,6 +5390,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_ppfeature_status = vega10_get_ppfeature_status, .set_ppfeature_status = vega10_set_ppfeature_status, .set_mp1_state = vega10_set_mp1_state, + .disable_power_features_for_compute_performance = + vega10_disable_power_features_for_compute_performance, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9295bd90b792..5bcf0d684151 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -4176,6 +4176,20 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr, return ret; } +static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr, + uint32_t pstate) +{ + int ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetXgmiMode, + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); + if (ret) + pr_err("SetXgmiPstate failed!\n"); + + return ret; +} + static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* init/fini related */ .backend_init = vega20_hwmgr_backend_init, @@ -4245,6 +4259,7 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .set_mp1_state = vega20_set_mp1_state, .smu_i2c_bus_access = vega20_smu_i2c_bus_access, .set_df_cstate = vega20_set_df_cstate, + .set_xgmi_pstate = vega20_set_xgmi_pstate, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 8120e7587585..ac9758305ab3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -261,7 +261,6 @@ struct smu_table_context struct smu_table *tables; struct smu_table memory_pool; uint8_t thermal_controller_type; - uint16_t TDPODLimit; void *overdrive_table; }; @@ -391,6 +390,7 @@ struct smu_context uint32_t smc_if_version; + bool uploading_custom_pp_table; }; struct i2c_adapter; @@ -497,8 +497,8 @@ struct pptable_funcs { int (*notify_memory_pool_location)(struct smu_context *smu); int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu); int (*system_features_control)(struct smu_context *smu, bool en); - int (*send_smc_msg)(struct smu_context *smu, uint16_t msg); - int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param); + int (*send_smc_msg_with_param)(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg); int (*init_display_count)(struct smu_context *smu, uint32_t count); int (*set_allowed_mask)(struct smu_context *smu); @@ -548,6 +548,7 @@ struct pptable_funcs { int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); + uint32_t (*get_pptable_power_limit)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); @@ -717,4 +718,6 @@ int smu_get_uclk_dpm_states(struct smu_context *smu, int smu_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table); +uint32_t smu_get_pptable_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bd8c922dfd3e..af977675fd33 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -356,6 +356,9 @@ struct pp_hwmgr_func { int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode); int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire); int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state); + int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate); + int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr, + bool disable); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h index 886b9a21ebd8..a886f0644d24 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h @@ -159,7 +159,7 @@ //FIXME need updating // Debug Overrides Bitmask #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001 -#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002 +#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002 // I2C Config Bit Defines #define I2C_CONTROLLER_ENABLED 1 diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index abd4debb3def..719844257713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -27,7 +27,7 @@ #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU11_DRIVER_IF_VERSION_VG20 0x13 -#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F +#define SMU11_DRIVER_IF_VERSION_ARCT 0x10 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 @@ -48,6 +48,8 @@ #define SMU11_TOOL_SIZE 0x19000 +#define MAX_PCIE_CONF 2 + #define CLK_MAP(clk, index) \ [SMU_##clk] = {1, (index)} @@ -88,6 +90,11 @@ struct smu_11_0_dpm_table { uint32_t max; /* MHz */ }; +struct smu_11_0_pcie_table { + uint8_t pcie_gen[MAX_PCIE_CONF]; + uint8_t pcie_lane[MAX_PCIE_CONF]; +}; + struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table soc_table; struct smu_11_0_dpm_table gfx_table; @@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table display_table; struct smu_11_0_dpm_table phy_table; struct smu_11_0_dpm_table fclk_table; + struct smu_11_0_pcie_table pcie_table; }; struct smu_11_0_dpm_context { @@ -169,10 +177,9 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu); int smu_v11_0_system_features_control(struct smu_context *smu, bool en); -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg); @@ -250,4 +257,8 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size); + +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index 86cdc3393eac..b2f96a101124 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table struct smu_11_0_power_saving_clock_table power_saving_clock; struct smu_11_0_overdrive_table overdrive_table; +#ifndef SMU_11_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h +#endif } __attribute__((packed)); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h index 9b9f5df0911c..9d81d789c713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h @@ -44,10 +44,9 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg); int smu_v12_0_wait_for_response(struct smu_context *smu); -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v12_0_check_fw_status(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 769f9451d904..4a14fd1f9fd5 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -36,6 +36,7 @@ #include "navi10_ppt.h" #include "smu_v11_0_pptable.h" #include "smu_v11_0_ppsmc.h" +#include "nbio/nbio_7_4_sh_mask.h" #include "asic_reg/mp/mp_11_0_sh_mask.h" @@ -207,7 +208,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -599,6 +600,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; PPTable_t *driver_ppt = NULL; + int i; driver_ppt = table_context->driver_pptable; @@ -629,6 +631,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + } + return 0; } @@ -691,13 +698,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +{ + return od_table->cap[feature]; +} + + static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { + uint16_t *curve_settings; int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; + struct smu_table_context *table_context = &smu->smu_table; + uint32_t gen_speed, lane_width; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct amdgpu_device *adev = smu->adev; + PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; + struct smu_11_0_overdrive_table *od_settings = smu->od_settings; switch (clk_type) { case SMU_GFXCLK: @@ -748,6 +771,69 @@ static int navi10_print_clk_levels(struct smu_context *smu, } break; + case SMU_PCIE: + gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; + lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + for (i = 0; i < NUM_LINK_LEVELS; i++) + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "", + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "", + pptable->LclkFreq[i], + (gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) && + (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ? + "*" : ""); + break; + case SMU_OD_SCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + break; + size += sprintf(buf + size, "OD_SCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + break; + case SMU_OD_MCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + break; + size += sprintf(buf + size, "OD_MCLK:\n"); + size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + break; + case SMU_OD_VDDC_CURVE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + break; + size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + curve_settings = &od_table->GfxclkFreq1; + break; + case 1: + curve_settings = &od_table->GfxclkFreq2; + break; + case 2: + curve_settings = &od_table->GfxclkFreq3; + break; + default: + break; + } + size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + } + break; default: break; } @@ -773,6 +859,12 @@ static int navi10_force_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_DCEFCLK: case SMU_FCLK: + /* There is only 2 levels for fine grained DPM */ + if (navi10_is_support_fine_grained_dpm(smu, clk_type)) { + soft_max_level = (soft_max_level >= 1 ? 1 : 0); + soft_min_level = (soft_min_level >= 1 ? 1 : 0); + } + ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) return size; @@ -1582,17 +1674,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, return ret; } +static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static int navi10_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1615,17 +1712,11 @@ static int navi10_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -1640,6 +1731,9 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, int ret, i; uint32_t smu_pcie_arg; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : @@ -1648,11 +1742,260 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, ret = smu_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg); + + if (ret) + return ret; + + if (pptable->PcieGenSpeed[i] > pcie_gen_cap) + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; + if (pptable->PcieLaneCount[i] > pcie_width_cap) + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; + } + + return 0; +} + +static inline void navi10_dump_od_table(OverDriveTable_t *od_table) { + pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1); + pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2); + pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3); + pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax); + pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct); +} + +static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value) +{ + if (value < od_table->min[setting]) { + pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]); + return -EINVAL; + } + if (value > od_table->max[setting]) { + pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]); + return -EINVAL; + } + return 0; +} + +static int navi10_setup_od_limits(struct smu_context *smu) { + struct smu_11_0_overdrive_table *overdrive_table = NULL; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + + if (!smu->smu_table.power_play_table) { + pr_err("powerplay table uninitialized!\n"); + return -ENOENT; + } + powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; + overdrive_table = &powerplay_table->overdrive_table; + if (!smu->od_settings) { + smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL); + } else { + memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table)); + } + return 0; +} + +static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) { + OverDriveTable_t *od_table; + int ret = 0; + + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + + if (initialize) { + ret = navi10_setup_od_limits(smu); + if (ret) { + pr_err("Failed to retrieve board OD limits\n"); + return ret; + } + } + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; + if (od_table) { + navi10_dump_od_table(od_table); + } + + return ret; +} + +static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { + int i; + int ret = 0; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table; + struct smu_11_0_overdrive_table *od_settings; + enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting; + uint16_t *freq_ptr, *voltage_ptr; + od_table = (OverDriveTable_t *)table_context->overdrive_table; + + if (!smu->od_enabled) { + pr_warn("OverDrive is not enabled!\n"); + return -EINVAL; + } + + if (!smu->od_settings) { + pr_err("OD board limits are not set!\n"); + return -ENOENT; + } + + od_settings = smu->od_settings; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + pr_warn("GFXCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + if (!table_context->overdrive_table) { + pr_err("Overdrive is not initialized\n"); + return -EINVAL; + } + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", size); + return -EINVAL; + } + switch (input[i]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN; + freq_ptr = &od_table->GfxclkFmin; + if (input[i + 1] > od_table->GfxclkFmax) { + pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n", + input[i + 1], + od_table->GfxclkFmin); + return -EINVAL; + } + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX; + freq_ptr = &od_table->GfxclkFmax; + if (input[i + 1] < od_table->GfxclkFmin) { + pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n", + input[i + 1], + od_table->GfxclkFmax); + return -EINVAL; + } + break; + default: + pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + pr_info("Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]); + if (ret) + return ret; + *freq_ptr = input[i + 1]; + } + break; + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + pr_warn("UCLK_MAX not supported!\n"); + return -ENOTSUPP; + } + if (size < 2) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (input[0] != 1) { + pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]); + pr_info("Supported indices: [1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]); + if (ret) + return ret; + od_table->UclkFmax = input[1]; + break; + case PP_OD_COMMIT_DPM_TABLE: + navi10_dump_od_table(od_table); + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + // no lock needed because smu_od_edit_dpm_table has it + ret = smu_handle_task(smu, smu->smu_dpm.dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE, + false); + if (ret) { + return ret; + } + break; + case PP_OD_EDIT_VDDC_CURVE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + pr_warn("GFXCLK_CURVE not supported!\n"); + return -ENOTSUPP; + } + if (size < 3) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (!od_table) { + pr_info("Overdrive is not initialized\n"); + return -EINVAL; + } + + switch (input[0]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1; + freq_ptr = &od_table->GfxclkFreq1; + voltage_ptr = &od_table->GfxclkVolt1; + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2; + freq_ptr = &od_table->GfxclkFreq2; + voltage_ptr = &od_table->GfxclkVolt2; + break; + case 2: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3; + freq_ptr = &od_table->GfxclkFreq3; + voltage_ptr = &od_table->GfxclkVolt3; + break; + default: + pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]); + pr_info("Supported indices: [0, 1, 2]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]); + if (ret) + return ret; + // Allow setting zero to disable the OverDrive VDDC curve + if (input[2] != 0) { + ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]); + if (ret) + return ret; + *freq_ptr = input[1]; + *voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE; + pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr); + } else { + // If setting 0, disable all voltage curve settings + od_table->GfxclkVolt1 = 0; + od_table->GfxclkVolt2 = 0; + od_table->GfxclkVolt3 = 0; + } + navi10_dump_od_table(od_table); + break; + default: + return -ENOSYS; + } return ret; } +static int navi10_run_btc(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc); + if (ret) + pr_err("RunBtc failed!\n"); + + return ret; +} static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, @@ -1712,7 +2055,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, @@ -1742,6 +2084,10 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .set_default_od_settings = navi10_set_default_od_settings, + .od_edit_dpm_table = navi10_od_edit_dpm_table, + .get_pptable_power_limit = navi10_get_pptable_power_limit, + .run_btc = navi10_run_btc, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h index a37e37c5f105..ec03c7992f6d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h @@ -33,6 +33,11 @@ #define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717) #define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448) +#define NAVI10_VOLTAGE_SCALE (4) + +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + extern void navi10_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 4a9751971a9d..977bdd962e98 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -180,11 +180,13 @@ static int renoir_print_clk_levels(struct smu_context *smu, int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; - SmuMetrics_t metrics = {0}; + SmuMetrics_t metrics; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; + memset(&metrics, 0, sizeof(metrics)); + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, false); if (ret) @@ -695,7 +697,6 @@ static const struct pptable_funcs renoir_ppt_funcs = { .check_fw_version = smu_v12_0_check_fw_version, .powergate_sdma = smu_v12_0_powergate_sdma, .powergate_vcn = smu_v12_0_powergate_vcn, - .send_smc_msg = smu_v12_0_send_msg, .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, .read_smc_arg = smu_v12_0_read_arg, .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index 8bcda7871309..8872f8b2d502 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -75,8 +75,8 @@ #define smu_set_default_od_settings(smu, initialize) \ ((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0) -#define smu_send_smc_msg(smu, msg) \ - ((smu)->ppt_funcs->send_smc_msg? (smu)->ppt_funcs->send_smc_msg((smu), (msg)) : 0) +int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg); + #define smu_send_smc_msg_with_param(smu, msg, param) \ ((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0) #define smu_read_smc_arg(smu, arg) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index bbb74b1d5d80..e4268a627eff 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/pci.h> +#define SMU_11_0_PARTIAL_PPTABLE + #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" @@ -31,6 +33,7 @@ #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" +#include "smu_v11_0_pptable.h" #include "soc15_common.h" #include "atom.h" #include "amd_pcie.h" @@ -87,36 +90,11 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v11_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v11_0_wait_for_response(smu); - - if (ret) - pr_err("failed send message: %10s (%d) response %#x\n", - smu_get_message_name(smu, msg), index, ret); - - return ret; - -} - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { - struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0; @@ -368,6 +346,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) version_major = le16_to_cpu(hdr->header.header_version_major); version_minor = le16_to_cpu(hdr->header.header_version_minor); if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { + pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); switch (version_minor) { case 0: ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); @@ -384,6 +363,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) return ret; } else { + pr_info("use vbios provided pptable\n"); index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, powerplayinfo); @@ -1043,13 +1023,44 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) return 0; } +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) { + uint32_t od_limit, max_power_limit; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + powerplay_table = table_context->power_play_table; + + max_power_limit = smu_get_pptable_power_limit(smu); + + if (!max_power_limit) { + // If we couldn't get the table limit, fall back on first-read value + if (!smu->default_power_limit) + smu->default_power_limit = smu->power_limit; + max_power_limit = smu->default_power_limit; + } + + if (smu->od_enabled) { + od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + + pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit); + + max_power_limit *= (100 + od_limit); + max_power_limit /= 100; + } + + return max_power_limit; +} + int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) { int ret = 0; + uint32_t max_power_limit; - if (n > smu->default_power_limit) { - pr_err("New power limit is over the max allowed %d\n", - smu->default_power_limit); + max_power_limit = smu_v11_0_get_max_power_limit(smu); + + if (n > max_power_limit) { + pr_err("New power limit (%d) is over the max allowed %d\n", + n, + max_power_limit); return -EINVAL; } @@ -1463,16 +1474,13 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, return ret; } -#define XGMI_STATE_D0 1 -#define XGMI_STATE_D3 0 - int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, uint32_t pstate) { int ret = 0; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetXgmiMode, - pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); return ret; } @@ -1780,3 +1788,30 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu) return ret; } + +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + if (initialize) { + if (table_context->overdrive_table) { + return -EINVAL; + } + table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL); + if (!table_context->overdrive_table) { + return -ENOMEM; + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export overdrive table!\n"); + return ret; + } + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 139dd737eaa5..094cfc46adac 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -77,33 +77,9 @@ int smu_v12_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v12_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v12_0_wait_for_response(smu); - - if (ret) - pr_err("Failed to send message 0x%x, response 0x%x\n", index, - ret); - - return ret; - -} - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 7c8933f987d1..60b9ff097142 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -221,7 +221,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -466,7 +466,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu) sizeof(PPTable_t)); table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; - table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); return 0; } @@ -3232,7 +3231,6 @@ static const struct pptable_funcs vega20_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c index 6b7f791685ec..d6a6692db0ac 100644 --- a/drivers/gpu/drm/arc/arcpgu_drv.c +++ b/drivers/gpu/drm/arc/arcpgu_drv.c @@ -14,6 +14,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_of.h> #include <drm/drm_probe_helper.h> #include <linux/dma-mapping.h> #include <linux/module.h> @@ -45,7 +46,7 @@ static int arcpgu_load(struct drm_device *drm) { struct platform_device *pdev = to_platform_device(drm->dev); struct arcpgu_drm_private *arcpgu; - struct device_node *encoder_node; + struct device_node *encoder_node = NULL, *endpoint_node = NULL; struct resource *res; int ret; @@ -80,14 +81,23 @@ static int arcpgu_load(struct drm_device *drm) if (arc_pgu_setup_crtc(drm) < 0) return -ENODEV; - /* find the encoder node and initialize it */ - encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0); + /* + * There is only one output port inside each device. It is linked with + * encoder endpoint. + */ + endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL); + if (endpoint_node) { + encoder_node = of_graph_get_remote_port_parent(endpoint_node); + of_node_put(endpoint_node); + } + if (encoder_node) { ret = arcpgu_drm_hdmi_init(drm, encoder_node); of_node_put(encoder_node); if (ret < 0) return ret; } else { + dev_info(drm->dev, "no encoder found. Assumed virtual LCD on simulation platform\n"); ret = arcpgu_drm_sim_init(drm, NULL); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index d49772de93e0..52648b4008bc 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -84,7 +84,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_modeset_disables(dev, old_state); - drm_atomic_helper_commit_planes(dev, old_state, 0); + drm_atomic_helper_commit_planes(dev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); drm_atomic_helper_commit_modeset_enables(dev, old_state); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index 42bdc63dcffa..52750116aa19 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -581,8 +581,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter, } if (!in_range(&splitter->vsize, dflow->in_h)) { - DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n", - dflow->in_w); + DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n", + dflow->in_h); return -EINVAL; } diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 587052751b48..b191d39c071d 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1571,8 +1571,11 @@ static void commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; const struct drm_mode_config_helper_funcs *funcs; + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; ktime_t start; s64 commit_time_ms; + unsigned int i, new_self_refresh_mask = 0; funcs = dev->mode_config.helper_private; @@ -1592,6 +1595,15 @@ static void commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_wait_for_dependencies(old_state); + /* + * We cannot safely access new_crtc_state after + * drm_atomic_helper_commit_hw_done() so figure out which crtc's have + * self-refresh active beforehand: + */ + for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) + if (new_crtc_state->self_refresh_active) + new_self_refresh_mask |= BIT(i); + if (funcs && funcs->atomic_commit_tail) funcs->atomic_commit_tail(old_state); else @@ -1600,7 +1612,8 @@ static void commit_tail(struct drm_atomic_state *old_state) commit_time_ms = ktime_ms_delta(ktime_get(), start); if (commit_time_ms > 0) drm_self_refresh_helper_update_avg_times(old_state, - (unsigned long)commit_time_ms); + (unsigned long)commit_time_ms, + new_self_refresh_mask); drm_atomic_helper_commit_cleanup_done(old_state); diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 68f4765a5896..dd33fec5aabd 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -133,29 +133,33 @@ out_drop_locks: * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages * @state: the state which has just been applied to hardware * @commit_time_ms: the amount of time in ms that this commit took to complete + * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in + * new state * * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will * update the average entry/exit self refresh times on self refresh transitions. * These averages will be used when calculating how long to delay before * entering self refresh mode after activity. */ -void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, - unsigned int commit_time_ms) +void +drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms, + unsigned int new_self_refresh_mask) { struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_crtc_state *old_crtc_state; int i; - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { + for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { + bool new_self_refresh_active = new_self_refresh_mask & BIT(i); struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; struct ewma_psr_time *time; if (old_crtc_state->self_refresh_active == - new_crtc_state->self_refresh_active) + new_self_refresh_active) continue; - if (new_crtc_state->self_refresh_active) + if (new_self_refresh_active) time = &sr_data->entry_avg_ms; else time = &sr_data->exit_avg_ms; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 698db540972c..648cf0207309 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) etnaviv_cmdbuf_get_va(&submit->cmdbuf, &gpu->mmu_context->cmdbuf_mapping)); + mutex_unlock(&gpu->mmu_context->lock); + /* Reserve space for the bomap */ if (n_bomap_pages) { bomap_start = bomap = iter.data; @@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) obj->base.size); } - mutex_unlock(&gpu->mmu_context->lock); - etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data); dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index 043111a1d60c..f8bf488e9d71 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu memcpy(buf, v2_context->mtlb_cpu, SZ_4K); buf += SZ_4K; - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K) - if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) + for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) + if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) { memcpy(buf, v2_context->stlb_cpu[i], SZ_4K); + buf += SZ_4K; + } } static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 35ebae6a1be7..3607d348c298 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping, global->memory_base); - if (ret) { - global->ops->free(ctx); - return NULL; + if (ret) + goto out_free; + + if (global->version == ETNAVIV_IOMMU_V1 && + ctx->cmdbuf_mapping.iova > 0x80000000) { + dev_err(global->dev, + "command buffer outside valid memory window\n"); + goto out_unmap; } return ctx; + +out_unmap: + etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping); +out_free: + global->ops->free(ctx); + return NULL; } void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 1799537a3228..c280b6ae38eb 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL config DRM_I915_PREEMPT_TIMEOUT int "Preempt timeout (ms, jiffy granularity)" - default 100 # milliseconds + default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur when submitting a new context via execlists. If the current context diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 9cd6d2348a1e..c2875b10adf9 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fifo_changed = false; + crtc_state->preload_luts = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0caef2592a7e..ed8c7ce62119 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk) static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 312000) + if (cdclk > 326400) + return 3; + else if (cdclk > 312000) return 2; else if (cdclk > 180000) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index fa44eb73d088..aa3a063549c3 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state) dev_priv->display.color_commit(crtc_state); } +static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + return !old_crtc_state->base.gamma_lut && + !old_crtc_state->base.degamma_lut; +} + +static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * CGM_PIPE_MODE is itself single buffered. We'd have to + * somehow split it out from chv_load_luts() if we wanted + * the ability to preload the CGM LUTs/CSC without tearing. + */ + if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode) + return false; + + return !old_crtc_state->base.gamma_lut; +} + +static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * The hardware degamma is active whenever the pipe + * CSC is active. Thus even if the old state has no + * software degamma we need to avoid clobbering the + * linear hardware degamma mid scanout. + */ + return !old_crtc_state->csc_enable && + !old_crtc_state->base.gamma_lut; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); @@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = chv_can_preload_luts(crtc_state); + return 0; } @@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = glk_can_preload_luts(crtc_state); + return 0; } @@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) crtc_state->csc_mode = icl_csc_mode(crtc_state); + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b51f244ad7a5..c7c2b349858d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { /* VS pre-emp Non-trans mV Pre-emph dB */ { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ @@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ }; +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { + /* HDMI Preset VS Pre-emph */ + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); default_entry = n_entries - 1; } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) @@ -1794,10 +1808,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * of Color Encoding Format and Content Color Gamut] while sending * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. - * - * FIXME MST doesn't pass in the conn_state */ - if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); @@ -2373,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, @@ -2825,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); - ddi_translations = tgl_dkl_phy_ddi_translations; + if (encoder->type == INTEL_OUTPUT_HDMI) { + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; + } else { + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + ddi_translations = tgl_dkl_phy_dp_ddi_trans; + } if (level >= n_entries) level = n_entries - 1; @@ -3605,7 +3622,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); - intel_ddi_set_dp_msa(crtc_state, conn_state); + /* MST will call a setting of MSA after an allocating of Virtual Channel + * from MST encoder pre_enable callback. + */ + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + intel_ddi_set_dp_msa(crtc_state, conn_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 348ce0456696..6f5e3bd13ad1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -66,6 +66,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_display_types.h" +#include "intel_dp_link_training.h" #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fifo_underrun.h" @@ -2528,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all. */ crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + if (!crtc) + return 0; + plane = to_intel_plane(crtc->base.primary); return plane->max_stride(plane, pixel_format, modifier, @@ -14201,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc, /* vblanks work again, re-enable pipe CRC. */ intel_crtc_enable_pipe_crc(crtc); } else { + if (new_crtc_state->preload_luts && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); + intel_pre_plane_update(old_crtc_state, new_crtc_state); if (new_crtc_state->update_pipe) @@ -14713,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { if (new_crtc_state->base.active && !needs_modeset(new_crtc_state) && + !new_crtc_state->preload_luts && (new_crtc_state->base.color_mgmt_changed || new_crtc_state->update_pipe)) intel_color_load_luts(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 355c50088589..f417e0948001 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -27,7 +27,6 @@ #include <drm/drm_util.h> #include <drm/i915_drm.h> -#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 4341bd66a418..1a7334dbe802 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -775,6 +775,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fifo_changed; /* FIFO split is changed */ + bool preload_luts; /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 9ae5b8b6bbbc..03d1cba0b696 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); intel_ddi_enable_pipe_clock(pipe_config); + + intel_ddi_set_dp_msa(pipe_config, conn_state); } static void intel_mst_enable_dp(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 3d1061470e76..48c960ca12fb 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -234,6 +234,11 @@ static int intelfb_create(struct drm_fb_helper *helper, info->apertures->ranges[0].base = ggtt->gmadr.start; info->apertures->ranges[0].size = ggtt->mappable_end; + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + vma->node.start); + info->fix.smem_len = vma->node.size; + vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { DRM_ERROR("Failed to remap framebuffer into virtual memory\n"); @@ -243,10 +248,6 @@ static int intelfb_create(struct drm_fb_helper *helper, info->screen_base = vaddr; info->screen_size = vma->node.size; - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = (unsigned long)info->screen_base; - info->fix.smem_len = info->screen_size; - drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index edc41fc40726..72fda0430062 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2885,7 +2885,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { - static const struct drm_plane_funcs *plane_funcs; + const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index de6e55af82cf..255ab040022e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -236,6 +236,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); + kfree(ctx->jump_whitelist); + if (ctx->timeline) intel_timeline_put(ctx->timeline); @@ -527,6 +529,9 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); @@ -722,6 +727,7 @@ int i915_gem_init_contexts(struct drm_i915_private *i915) void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { destroy_kernel_context(&i915->kernel_context); + flush_work(&i915->gem.contexts.free_work); } static int context_idr_cleanup(int id, void *p, void *data) @@ -1136,7 +1142,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (i915_gem_context_is_closed(ctx)) { err = -ENOENT; - goto out; + goto unlock; } if (vm == rcu_access_pointer(ctx->vm)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 861d7d92fe9f..3870dd5daaa0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -176,6 +176,13 @@ struct i915_gem_context { * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** jump_whitelist: Bit array for tracking cmds during cmdparsing + * Guarded by struct_mutex + */ + unsigned long *jump_whitelist; + /** jump_whitelist_cmds: No of cmd slots available */ + u32 jump_whitelist_cmds; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e4f5c269150a..f0998f1225af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -298,7 +298,9 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -1990,40 +1992,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = eb->i915; + struct i915_vma * const vma = *eb->vma; + struct i915_address_space *vm; + u64 flags; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.vm; + } else if (vma->vm->has_read_only) { + flags = PIN_USER; + vm = vma->vm; + i915_gem_object_set_readonly(obj); + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct intel_engine_pool_node *pool; struct i915_vma *vma; + u64 batch_start; + u64 shadow_batch_start; int err; pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); - err = intel_engine_cmd_parser(eb->engine, + vma = shadow_batch_pin(eb, pool->obj); + if (IS_ERR(vma)) + goto err; + + batch_start = gen8_canonical_addr(eb->batch->node.start) + + eb->batch_start_offset; + + shadow_batch_start = gen8_canonical_addr(vma->node.start); + + err = intel_engine_cmd_parser(eb->gem_context, + eb->engine, eb->batch->obj, - pool->obj, + batch_start, eb->batch_start_offset, eb->batch_len, - is_master); + pool->obj, + shadow_batch_start); + if (err) { - if (err == -EACCES) /* unhandled chained batch */ + i915_vma_unpin(vma); + + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) + /* Execute original buffer non-secure */ vma = NULL; else vma = ERR_PTR(err); goto err; } - vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto err; - eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; + + if (CMDPARSER_USES_GGTT(eb->i915)) + eb->batch_flags |= I915_DISPATCH_SECURE; + + /* eb->batch_len unchanged */ + vma->private = pool; return vma; @@ -2430,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; @@ -2441,7 +2498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = i915; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2461,8 +2518,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(i915) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(i915)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2539,34 +2603,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; - vma = eb_parse(&eb, drm_is_current_master(file)); + vma = eb_parse(&eb); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_vma; } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } } - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1e045c337044..4c72d74d6576 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, obj->mm.dirty = false; for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..01765a7ec18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f8113bc756c6..813bd3a610d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,13 +28,13 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); @@ -1372,6 +1374,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1409,6 +1412,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5051f304705b..06aa14c7aa8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) void intel_engine_park_heartbeat(struct intel_engine_cs *engine) { - cancel_delayed_work(&engine->heartbeat.work); - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } void intel_engine_init_heartbeat(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3c0f490ff2c7..0e1ad4a4bd97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + GEM_TRACE("%s\n", engine->name); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ + spin_lock(&timelines->lock); + + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); - __i915_request_queue(rq, NULL); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } @@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..24e20344dc22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c5d1047a4bc5..17f1f1441efc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -451,16 +451,25 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -528,9 +537,15 @@ struct intel_engine_cs { }; static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 060a27d9af34..a459a42ad5c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -61,6 +61,9 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); @@ -86,6 +89,11 @@ static int __gt_park(struct intel_wakeref *wf) /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { + intel_rc6_ctx_wa_check(&i915->gt.rc6); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + } + GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); @@ -97,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) @@ -264,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) static suspend_state_t pm_suspend_target(void) { -#if IS_ENABLED(CONFIG_PM_SLEEP) +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) return pm_suspend_target_state; #else return PM_SUSPEND_TO_IDLE; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..990efc27a4e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index b73229a84d85..3dc13ecf41bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/workqueue.h> + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" @@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt) intel_engine_flush_submission(engine); } +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + * + * However, we rely on that a timeline can only be active on a single + * engine at any one time and that add_retire() is called before the + * engine releases the timeline and transferred to another to retire. + */ + + if (READ_ONCE(tl->retire)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; @@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); if (timeout > 0) { @@ -74,16 +149,16 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); + else + active_count += !!rcu_access_pointer(tl->last_request.fence); mutex_unlock(&tl->mutex); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..d626fb115386 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51aef2a233cb..9fdefbdc3546 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -142,6 +142,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -990,6 +991,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -998,6 +1052,9 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -1047,71 +1104,29 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - - execlists_init_reg_state(regs, ce, engine, ce->ring, false); -} - -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) +static inline void +__execlists_schedule_out(struct i915_request *rq, + struct intel_engine_cs * const engine) { struct intel_context * const ce = rq->hw_context; - u32 head; /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; - else - head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - -static inline void -__execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) -{ - struct intel_context * const ce = rq->hw_context; + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); - - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) - reset_active(rq, engine); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may @@ -1931,16 +1946,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void @@ -2093,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - set_timeslice(engine); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 5ad4a92a9582..700104b90163 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -178,8 +178,13 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | rc6_mode); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); } static void gen8_rc6_enable(struct intel_rc6 *rc6) @@ -486,6 +491,66 @@ static void rpm_put(struct intel_rc6 *rc6) rc6->wakeref = false; } +static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6) +{ + return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO); +} + +static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + rc6->ctx_corrupted = true; + } +} + +/** + * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @rc6: rc6 state + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6) +{ + if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + rc6->ctx_corrupted = false; + } +} + +/** + * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @rc6: rc6 state + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. +*/ +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (rc6->ctx_corrupted) + return; + + if (!intel_rc6_ctx_corrupted(rc6)) + return; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_rc6_disable(rc6); + rc6->ctx_corrupted = true; + + return; +} + static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); @@ -510,6 +575,8 @@ void intel_rc6_init(struct intel_rc6 *rc6) if (!rc6_supported(rc6)) return; + intel_rc6_ctx_wa_init(rc6); + if (IS_CHERRYVIEW(i915)) err = chv_rc6_init(rc6); else if (IS_VALLEYVIEW(i915)) @@ -544,6 +611,9 @@ void intel_rc6_enable(struct intel_rc6 *rc6) GEM_BUG_ON(rc6->enabled); + if (rc6->ctx_corrupted) + return; + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (IS_CHERRYVIEW(i915)) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 5e6711f36457..1370f6834a4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -22,4 +22,7 @@ void intel_rc6_disable(struct intel_rc6 *rc6); u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6); +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6); + #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index 214f354d6ae4..89ad5697a8d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -23,6 +23,7 @@ struct intel_rc6 { bool supported : 1; bool enabled : 1; bool wakeref : 1; + bool ctx_corrupted : 1; }; #endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..c97423a76642 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index ece20504d240..374b28f13ca0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring) i915_vma_make_unshrinkable(vma); - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + ring->vaddr = addr; return 0; err_ring: @@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring) if (!atomic_dec_and_test(&ring->pin_count)) return; - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..649798c184fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); @@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); spin_unlock_irqrestore(&timelines->lock, flags); } @@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); spin_unlock_irqrestore(&timelines->lock, flags); /* diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..aaf15cbe1ce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -42,7 +42,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; @@ -66,6 +66,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 019ae6486e8d..3ee4a4e7689d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -554,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -610,6 +617,14 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index afd7f66bdc2d..bd12af349123 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3420,6 +3420,10 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, } for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { + /* pvinfo data doesn't come from hw mmio */ + if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE) + continue; + for (j = 0; j < block->size; j += 4) { ret = handler(gvt, i915_mmio_reg_offset(block->offset) + j, diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 207383dda84d..dca15ace88f6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -91,14 +91,15 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + spin_lock_irq(&ref->tree_lock); if (!atomic_read(&ref->count)) /* before the first inc */ debug_object_activate(ref, &active_debug_desc); + spin_unlock_irq(&ref->tree_lock); } static void debug_active_deactivate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + lockdep_assert_held(&ref->tree_lock); if (!atomic_read(&ref->count)) /* after the last dec */ debug_object_deactivate(ref, &active_debug_desc); } @@ -128,29 +129,22 @@ __active_retire(struct i915_active *ref) { struct active_node *it, *n; struct rb_root root; - bool retire = false; + unsigned long flags; - lockdep_assert_held(&ref->mutex); GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ - if (atomic_dec_and_test(&ref->count)) { - debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; - retire = true; - } - - mutex_unlock(&ref->mutex); - if (!retire) + if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) return; GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); - rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); - } + debug_active_deactivate(ref); + + root = ref->tree; + ref->tree = RB_ROOT; + ref->cache = NULL; + + spin_unlock_irqrestore(&ref->tree_lock, flags); /* After the final retire, the entire struct may be freed */ if (ref->retire) @@ -158,6 +152,11 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + + rbtree_postorder_for_each_entry_safe(it, n, &root, node) { + GEM_BUG_ON(i915_active_fence_isset(&it->base)); + kmem_cache_free(global.slab_cache, it); + } } static void @@ -169,7 +168,6 @@ active_work(struct work_struct *wrk) if (atomic_add_unless(&ref->count, -1, 1)) return; - mutex_lock(&ref->mutex); __active_retire(ref); } @@ -180,9 +178,7 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* If we are inside interrupt context (fence signaling), defer */ - if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || - !mutex_trylock(&ref->mutex)) { + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { queue_work(system_unbound_wq, &ref->work); return; } @@ -227,7 +223,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) if (!prealloc) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); parent = NULL; @@ -257,7 +253,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) out: ref->cache = node; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; @@ -278,8 +274,10 @@ void __i915_active_init(struct i915_active *ref, if (bits & I915_ACTIVE_MAY_SLEEP) ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + spin_lock_init(&ref->tree_lock); ref->tree = RB_ROOT; ref->cache = NULL; + init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); @@ -510,7 +508,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) if (RB_EMPTY_ROOT(&ref->tree)) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); /* @@ -575,7 +573,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) goto match; } - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return NULL; @@ -583,7 +581,7 @@ match: rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ if (p == &ref->cache->node) ref->cache = NULL; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return rb_entry(p, struct active_node, node); } @@ -664,6 +662,7 @@ unwind: void i915_active_acquire_barrier(struct i915_active *ref) { struct llist_node *pos, *next; + unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -673,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref) * populated by i915_request_add_active_barriers() to point to the * request that will eventually release them. */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { struct active_node *node = barrier_from_ll(pos); struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; + spin_lock_irqsave_nested(&ref->tree_lock, flags, + SINGLE_DEPTH_NESTING); parent = NULL; p = &ref->tree.rb_node; while (*p) { @@ -694,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref) } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + spin_unlock_irqrestore(&ref->tree_lock, flags); GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - mutex_unlock(&ref->mutex); } void i915_request_add_active_barriers(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index d89a74c142c6..96aed0ee700a 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -48,6 +48,7 @@ struct i915_active { atomic_t count; struct mutex mutex; + spinlock_t tree_lock; struct active_node *cache; struct rb_root tree; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 24555102e198..f24096e27bef 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -53,13 +53,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -84,9 +82,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -194,7 +189,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -209,14 +204,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, +}; + +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN(engine->i915, 7)) + if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && + engine->class == COPY_ENGINE_CLASS)) return; switch (engine->class) { case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VIDEO_DECODE_CLASS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case COPY_ENGINE_CLASS: - if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN(engine->i915, 9)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN(engine->i915, 9)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VIDEO_ENHANCEMENT_CLASS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!intel_engine_needs_cmd_parser(engine)) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - for (; count > 0; ++table, --count) { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ @@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; @@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", @@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(const struct i915_gem_context *ctx, + u32 *cmd, u32 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @engine: the engine on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start: Canonical base address of batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int intel_engine_cmd_parser(struct intel_engine_cs *engine, + +int intel_engine_cmd_parser(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master) + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; bool needs_clflush_after = false; @@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; - break; - } - - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; + goto err; } if (desc->flags & CMD_DESC_FIXED) @@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, length, batch_end - cmd); ret = -EINVAL; - break; + goto err; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { + if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; + goto err; + } + + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, batch_start, + shadow_batch_start); + + if (ret) + goto err; break; } + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); + cmd += length; + offset += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; - break; + goto err; } } while (1); + if (needs_clflush_after) { + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + +err: i915_gem_object_unpin_map(shadow_batch_obj); return ret; } @@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_uabi_engine(engine, dev_priv) { - if (intel_engine_needs_cmd_parser(engine)) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } @@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ccb5b566795f..87e05ca3646a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -63,6 +63,7 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_debugfs.h" #include "i915_drv.h" @@ -1819,6 +1820,8 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + intel_rc6_ctx_wa_resume(&dev_priv->gt.rc6); + intel_gt_sanitize(&dev_priv->gt, true); ret = i915_ggtt_enable_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0567fe67e8c5..e29bc137e7ba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1614,9 +1614,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define VEBOX_MASK(dev_priv) \ ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) + #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) @@ -1649,10 +1656,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) + /* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9)) #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ @@ -1834,6 +1843,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) +struct i915_vma * __must_check +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags); + void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); static inline int __must_check @@ -1939,12 +1956,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -int intel_engine_cmd_parser(struct intel_engine_cs *engine, +int intel_engine_cmd_parser(struct i915_gem_context *cxt, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master); + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); /* intel_device_info.c */ static inline struct intel_device_info * @@ -2026,4 +2045,10 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } +static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc) +{ + return intel_guc_is_submission_supported(guc) && + intel_guc_is_running(guc); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ac23322fb382..b9eb6b3149b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -893,6 +893,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_address_space *vm = &dev_priv->ggtt.vm; + + return i915_gem_object_pin(obj, vm, view, size, alignment, + flags | PIN_GLOBAL); +} + +struct i915_vma * +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; @@ -958,7 +972,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin(vma, size, alignment, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88179202c556..6239a9adbf14 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - ppgtt->vm.total = ggtt->vm.total; - return 0; err_ppgtt: diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index ad33fbe90a28..cf8a8c3ef047 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -63,7 +63,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(i915); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e8b67f5e521d..3c85cb0ee99f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1029,9 +1029,9 @@ i915_error_object_create(struct drm_i915_private *i915, for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - s = io_mapping_map_atomic_wc(&mem->iomap, dma); + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap_atomic(s); + io_mapping_unmap(s); if (ret) break; } @@ -1043,9 +1043,9 @@ i915_error_object_create(struct drm_i915_private *i915, drm_clflush_pages(&page, 1); - s = kmap_atomic(page); + s = kmap(page); ret = compress_page(compress, s, dst); - kunmap_atomic(s); + kunmap(s); drm_clflush_pages(&page, 1); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a8c2318d3d5e..65d7c2e599de 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1870,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, config_length += num_lri_dwords(oa_config->mux_regs_len); config_length += num_lri_dwords(oa_config->b_counter_regs_len); config_length += num_lri_dwords(oa_config->flex_regs_len); - config_length++; /* MI_BATCH_BUFFER_END */ + config_length += 3; /* MI_BATCH_BUFFER_START */ config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); @@ -1895,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, oa_config->flex_regs, oa_config->flex_regs_len); - *cs++ = MI_BATCH_BUFFER_END; + /* Jump into the active wait. */ + *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8); + *cs++ = i915_ggtt_offset(stream->noa_wait); + *cs++ = 0; i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -3307,15 +3312,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } - if (props->hold_preemption) { - if (!props->single_context) { - DRM_DEBUG("preemption disable with no context\n"); - ret = -EINVAL; - goto err; - } - privileged_op = true; - } - /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3335,12 +3331,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * doesn't request global stream access (i.e. query based sampling * using MI_RECORD_PERF_COUNT. */ - if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; else if (IS_GEN(perf->i915, 12) && specific_ctx && (props->sample_flags & SAMPLE_OA_REPORT) == 0) privileged_op = false; + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2..2814218c5ba1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt) val = 0; if (intel_gt_pm_get_if_awake(gt)) { val = __get_rc6(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } spin_lock_irqsave(&pmu->lock, flags); @@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) skip: spin_unlock_irqrestore(&engine->uncore->lock, flags); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); } } @@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); val = intel_get_cagf(rps, val); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], @@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu) const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index c27cfef9281c..ef25ce6e395e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915, struct drm_i915_engine_info __user *info_ptr; struct drm_i915_query_engine_info query; struct drm_i915_engine_info info = { }; + unsigned int num_uabi_engines = 0; struct intel_engine_cs *engine; int len, ret; if (query_item->flags) return -EINVAL; + for_each_uabi_engine(engine, i915) + num_uabi_engines++; + len = sizeof(struct drm_i915_query_engine_info) + - RUNTIME_INFO(i915)->num_engines * - sizeof(struct drm_i915_engine_info); + num_uabi_engines * sizeof(struct drm_i915_engine_info); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53c280c4e741..73079b503724 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -474,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1 << 13) #define ECOBITS_PPGTT_CACHE64B (3 << 8) @@ -560,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) @@ -7353,6 +7359,10 @@ enum { #define DMC_DEBUG3 _MMIO(0x101090) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) @@ -9661,7 +9671,7 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) -#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10) +#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10) #define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 000ba43e2c02..8fd92b9130a7 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -62,7 +62,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - case INTEL_PCH_CNP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; @@ -76,6 +75,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; + case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n"); + WARN_ON(!IS_COFFEELAKE(dev_priv)); + /* Comet Lake V PCH is based on KBP, which is SPT compatible */ + return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Ice Lake PCH\n"); WARN_ON(!IS_ICELAKE(dev_priv)); diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 1115c6a0522c..d26c25dd8d54 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -40,10 +40,10 @@ enum intel_pch { #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 -#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 +#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5d2b460d3ee5..809bff955b5a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -107,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 868cc78048d0..59aa1b6f1827 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf) static void ____intel_wakeref_put_last(struct intel_wakeref *wf) { - if (!atomic_dec_and_test(&wf->count)) + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_dec_and_test(&wf->count))) goto unlock; /* ops->put() must reschedule its own release on error/deferral */ @@ -67,13 +68,12 @@ unlock: mutex_unlock(&wf->mutex); } -void __intel_wakeref_put_last(struct intel_wakeref *wf) +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) { INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); /* Assume we are not in process context and so cannot sleep. */ - if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC || - !mutex_trylock(&wf->mutex)) { + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { schedule_work(&wf->work); return; } @@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf, int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) { - return wait_var_event_killable(&wf->wakeref, - !intel_wakeref_is_active(wf)); + int err; + + might_sleep(); + + err = wait_var_event_killable(&wf->wakeref, + !intel_wakeref_is_active(wf)); + if (err) + return err; + + intel_wakeref_unlock_wait(wf); + return 0; } static void wakeref_auto_timeout(struct timer_list *t) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 5f0c972a80fb..da6e8fd506e6 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/bits.h> +#include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/stackdepot.h> @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t; struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); - - unsigned long flags; -#define INTEL_WAKEREF_PUT_ASYNC BIT(0) }; struct intel_wakeref { @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf, } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); -void __intel_wakeref_put_last(struct intel_wakeref *wf); +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); /** * intel_wakeref_get: Acquire the wakeref @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) } /** - * intel_wakeref_put: Release the wakeref - * @i915: the drm_i915_private device + * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref - * @fn: callback for releasing the wakeref, called only on final release. + * @flags: control flags * * Release our hold on the wakeref. When there are no more users, * the runtime pm wakeref will be released after the @fn callback is called @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) * code otherwise. */ static inline void -intel_wakeref_put(struct intel_wakeref *wf) +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) +#define INTEL_WAKEREF_PUT_ASYNC BIT(0) { INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) - __intel_wakeref_put_last(wf); + __intel_wakeref_put_last(wf, flags); +} + +static inline void +intel_wakeref_put(struct intel_wakeref *wf) +{ + might_sleep(); + __intel_wakeref_put(wf, 0); +} + +static inline void +intel_wakeref_put_async(struct intel_wakeref *wf) +{ + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); } /** @@ -152,6 +163,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf) } /** + * intel_wakeref_unlock_wait: Wait until the active callback is complete + * @wf: the wakeref + * + * Waits for the active callback (under the @wf->mutex or another CPU) is + * complete. + */ +static inline void +intel_wakeref_unlock_wait(struct intel_wakeref *wf) +{ + mutex_lock(&wf->mutex); + mutex_unlock(&wf->mutex); + flush_work(&wf->work); +} + +/** * intel_wakeref_is_active: Query whether the wakeref is currently held * @wf: the wakeref * @@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) static inline void __intel_wakeref_defer_park(struct intel_wakeref *wf) { + lockdep_assert_held(&wf->mutex); INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); atomic_set_release(&wf->count, 1); } diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9160ce39cbb..6deaa7d01654 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -7,6 +7,7 @@ config DRM_MSM depends on OF && COMMON_CLK depends on MMU depends on INTERCONNECT || !INTERCONNECT + depends on QCOM_OCMEM || QCOM_OCMEM=n select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 5f7e98028eaf..7ad14937fcdf 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -6,10 +6,6 @@ * Copyright (c) 2014 The Linux Foundation. All rights reserved. */ -#ifdef CONFIG_MSM_OCMEM -# include <mach/ocmem.h> -#endif - #include "a3xx_gpu.h" #define A3XX_INT0_MASK \ @@ -195,9 +191,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); /* Set the OCMEM base address for A330, etc */ - if (a3xx_gpu->ocmem_hdl) { + if (a3xx_gpu->ocmem.hdl) { gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a3xx_gpu->ocmem_base >> 14)); + (unsigned int)(a3xx_gpu->ocmem.base >> 14)); } /* Turn on performance counters: */ @@ -318,10 +314,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a3xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); kfree(a3xx_gpu); } @@ -494,17 +487,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a330(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a3xx_gpu->ocmem_hdl = ocmem_hdl; - a3xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a3xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, + adreno_gpu, &a3xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h index 5dc33e5ea53b..c555fb13e0d7 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -19,8 +19,7 @@ struct a3xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index ab2b752566d8..b01388a9e89e 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -2,9 +2,6 @@ /* Copyright (c) 2014 The Linux Foundation. All rights reserved. */ #include "a4xx_gpu.h" -#ifdef CONFIG_MSM_OCMEM -# include <soc/qcom/ocmem.h> -#endif #define A4XX_INT0_MASK \ (A4XX_INT0_RBBM_AHB_ERROR | \ @@ -188,7 +185,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) (1 << 30) | 0xFFFF); gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a4xx_gpu->ocmem_base >> 14)); + (unsigned int)(a4xx_gpu->ocmem.base >> 14)); /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); @@ -318,10 +315,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a4xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); kfree(a4xx_gpu); } @@ -578,17 +572,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a4xx(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a4xx_gpu->ocmem_hdl = ocmem_hdl; - a4xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a4xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, + &a4xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h index d506311ee240..a01448cba2ea 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h @@ -16,8 +16,7 @@ struct a4xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e9c55d1d6c04..b02e2042547f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -353,6 +353,9 @@ static int a5xx_me_init(struct msm_gpu *gpu) * 2D mode 3 draw */ OUT_RING(ring, 0x0000000B); + } else if (adreno_is_a510(adreno_gpu)) { + /* Workaround for token and syncs */ + OUT_RING(ring, 0x00000001); } else { /* No workarounds enabled */ OUT_RING(ring, 0x00000000); @@ -568,15 +571,24 @@ static int a5xx_hw_init(struct msm_gpu *gpu) 0x00100000 + adreno_gpu->gmem - 1); gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); - gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); - if (adreno_is_a530(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); - if (adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); - - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); + if (adreno_is_a510(adreno_gpu)) { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + } else { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); + if (adreno_is_a530(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); + if (adreno_is_a540(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + } if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); @@ -589,6 +601,19 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* Enable ME/PFP split notification */ gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + /* + * In A5x, CCU can send context_done event of a particular context to + * UCHE which ultimately reaches CP even when there is valid + * transaction of that context inside CCU. This can let CP to program + * config registers, which will make the "valid transaction" inside + * CCU to be interpreted differently. This can cause gpu fault. This + * bug is fixed in latest A510 revision. To enable this bug fix - + * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 + * (disable). For older A510 version this bit is unused. + */ + if (adreno_is_a510(adreno_gpu)) + gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); + /* Enable HWCG */ a5xx_set_hwcg(gpu, true); @@ -635,7 +660,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* UCHE */ gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); - if (adreno_is_a530(adreno_gpu)) + if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) gpu_write(gpu, REG_A5XX_CP_PROTECT(17), ADRENO_PROTECT_RW(0x10000, 0x8000)); @@ -679,7 +704,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) a5xx_preempt_hw_init(gpu); - a5xx_gpmu_ucode_init(gpu); + if (!adreno_is_a510(adreno_gpu)) + a5xx_gpmu_ucode_init(gpu); ret = a5xx_ucode_init(gpu); if (ret) @@ -712,7 +738,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) } /* - * Try to load a zap shader into the secure world. If successful + * If the chip that we are using does support loading one, then + * try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will @@ -1066,6 +1093,7 @@ static void a5xx_dump(struct msm_gpu *gpu) static int a5xx_pm_resume(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; /* Turn on the core power */ @@ -1073,6 +1101,15 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; + if (adreno_is_a510(adreno_gpu)) { + /* Halt the sp_input_clk at HM level */ + gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); + a5xx_set_hwcg(gpu, true); + /* Turn on sp_input_clk at HM level */ + gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); + return 0; + } + /* Turn the RBCCU domain first to limit the chances of voltage droop */ gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); @@ -1101,9 +1138,17 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) static int a5xx_pm_suspend(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + u32 mask = 0xf; + + /* A510 has 3 XIN ports in VBIF */ + if (adreno_is_a510(adreno_gpu)) + mask = 0x7; + /* Clear the VBIF pipe before shutting down */ - gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); - spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); + gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); + spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & + mask) == mask); gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); @@ -1289,7 +1334,7 @@ static void a5xx_gpu_state_destroy(struct kref *kref) kfree(a5xx_state); } -int a5xx_gpu_state_put(struct msm_gpu_state *state) +static int a5xx_gpu_state_put(struct msm_gpu_state *state) { if (IS_ERR_OR_NULL(state)) return 1; @@ -1299,8 +1344,8 @@ int a5xx_gpu_state_put(struct msm_gpu_state *state) #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) -void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct drm_printer *p) +static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) { int i, j; u32 pos = 0; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index a3a06db675ba..321a8061fd32 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -297,6 +297,10 @@ int a5xx_power_init(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; + /* Not all A5xx chips have a GPMU */ + if (adreno_is_a510(adreno_gpu)) + return 0; + /* Set up the limits management */ if (adreno_is_a530(adreno_gpu)) a530_lm_setup(gpu); @@ -326,6 +330,9 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) unsigned int *data, *ptr, *cmds; unsigned int cmds_size; + if (adreno_is_a510(adreno_gpu)) + return; + if (a5xx_gpu->gpmu_bo) return; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 0888e0df660d..fbbdf86504f5 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -115,6 +115,21 @@ static const struct adreno_info gpulist[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a4xx_gpu_init, }, { + .rev = ADRENO_REV(5, 1, 0, ANY_ID), + .revn = 510, + .name = "A510", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = SZ_256K, + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .init = a5xx_gpu_init, + }, { .rev = ADRENO_REV(5, 3, 0, 2), .revn = 530, .name = "A530", diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 048c8be426f3..0783e4b5486a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -14,6 +14,7 @@ #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> +#include <soc/qcom/ocmem.h> #include "adreno_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -893,6 +894,45 @@ static int adreno_get_pwrlevels(struct device *dev, return 0; } +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *adreno_ocmem) +{ + struct ocmem_buf *ocmem_hdl; + struct ocmem *ocmem; + + ocmem = of_get_ocmem(dev); + if (IS_ERR(ocmem)) { + if (PTR_ERR(ocmem) == -ENODEV) { + /* + * Return success since either the ocmem property was + * not specified in device tree, or ocmem support is + * not compiled into the kernel. + */ + return 0; + } + + return PTR_ERR(ocmem); + } + + ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->gmem); + if (IS_ERR(ocmem_hdl)) + return PTR_ERR(ocmem_hdl); + + adreno_ocmem->ocmem = ocmem; + adreno_ocmem->base = ocmem_hdl->addr; + adreno_ocmem->hdl = ocmem_hdl; + adreno_gpu->gmem = ocmem_hdl->len; + + return 0; +} + +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) +{ + if (adreno_ocmem && adreno_ocmem->base) + ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS, + adreno_ocmem->hdl); +} + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs, int nr_rings) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c7441fb8313e..e71a7570ef72 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -126,6 +126,12 @@ struct adreno_gpu { }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) +struct adreno_ocmem { + struct ocmem *ocmem; + unsigned long base; + void *hdl; +}; + /* platform config data (ie. from DT, or pdata) */ struct adreno_platform_config { struct adreno_rev rev; @@ -206,6 +212,11 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu) return gpu->revn == 430; } +static inline int adreno_is_a510(struct adreno_gpu *gpu) +{ + return gpu->revn == 510; +} + static inline int adreno_is_a530(struct adreno_gpu *gpu) { return gpu->revn == 530; @@ -236,6 +247,10 @@ void adreno_dump(struct msm_gpu *gpu); void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *ocmem); +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *ocmem); + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, int nr_rings); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c index cdbea38b8697..f1bc6a1af7a7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c @@ -55,8 +55,7 @@ static void dpu_core_irq_callback_handler(void *arg, int irq_idx) int dpu_core_irq_idx_lookup(struct dpu_kms *dpu_kms, enum dpu_intr_type intr_type, u32 instance_idx) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.irq_idx_lookup) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.irq_idx_lookup) return -EINVAL; return dpu_kms->hw_intr->ops.irq_idx_lookup(intr_type, @@ -73,7 +72,7 @@ static int _dpu_core_irq_enable(struct dpu_kms *dpu_kms, int irq_idx) unsigned long irq_flags; int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts || !dpu_kms->irq_obj.irq_counts) { DPU_ERROR("invalid params\n"); @@ -114,7 +113,7 @@ int dpu_core_irq_enable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -138,7 +137,7 @@ static int _dpu_core_irq_disable(struct dpu_kms *dpu_kms, int irq_idx) { int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -169,7 +168,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -186,7 +185,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear) { - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.get_interrupt_status) return 0; @@ -205,7 +204,7 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -240,7 +239,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -274,8 +273,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.clear_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.clear_all_irqs) return; dpu_kms->hw_intr->ops.clear_all_irqs(dpu_kms->hw_intr); @@ -283,8 +281,7 @@ static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.disable_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.disable_all_irqs) return; dpu_kms->hw_intr->ops.disable_all_irqs(dpu_kms->hw_intr); @@ -343,18 +340,8 @@ void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms, void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); dpu_clear_all_irqs(dpu_kms); dpu_disable_all_irqs(dpu_kms); @@ -379,18 +366,8 @@ void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++) if (atomic_read(&dpu_kms->irq_obj.enable_counts[i]) || diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 09a49b59bb5b..11f2bebe3869 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,18 +32,7 @@ enum dpu_perf_mode { static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv; - - if (!crtc->dev || !crtc->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return NULL; - } - priv = crtc->dev->dev_private; - if (!priv || !priv->kms) { - DPU_ERROR("invalid kms\n"); - return NULL; - } - return to_dpu_kms(priv->kms); } @@ -116,7 +105,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid parameters\n"); return 0; } @@ -215,7 +204,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) { struct dpu_crtc *dpu_crtc; - struct dpu_crtc_state *dpu_cstate; struct dpu_kms *kms; if (!crtc) { @@ -224,13 +212,12 @@ void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return; } dpu_crtc = to_dpu_crtc(crtc); - dpu_cstate = to_dpu_crtc_state(crtc->state); if (atomic_dec_return(&kms->bandwidth_ref) > 0) return; @@ -287,7 +274,6 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, u64 clk_rate = 0; struct dpu_crtc *dpu_crtc; struct dpu_crtc_state *dpu_cstate; - struct msm_drm_private *priv; struct dpu_kms *kms; int ret; @@ -297,11 +283,10 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return -EINVAL; } - priv = kms->dev->dev_private; dpu_crtc = to_dpu_crtc(crtc); dpu_cstate = to_dpu_crtc_state(crtc->state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index ce59adff06aa..f197dce54576 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -266,11 +266,20 @@ enum dpu_intf_mode dpu_crtc_get_intf_mode(struct drm_crtc *crtc) { struct drm_encoder *encoder; - if (!crtc || !crtc->dev) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return INTF_MODE_NONE; } + /* + * TODO: This function is called from dpu debugfs and as part of atomic + * check. When called from debugfs, the crtc->mutex must be held to + * read crtc->state. However reading crtc->state from atomic check isn't + * allowed (unless you have a good reason, a big comment, and a deep + * understanding of how the atomic/modeset locks work (<- and this is + * probably not possible)). So we'll keep the WARN_ON here for now, but + * really we need to figure out a better way to track our operating mode + */ WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); /* TODO: Returns the first INTF_MODE, could there be multiple values? */ @@ -694,7 +703,7 @@ static void dpu_crtc_disable(struct drm_crtc *crtc, unsigned long flags; bool release_bandwidth = false; - if (!crtc || !crtc->dev || !crtc->dev->dev_private || !crtc->state) { + if (!crtc || !crtc->state) { DPU_ERROR("invalid crtc\n"); return; } @@ -766,7 +775,7 @@ static void dpu_crtc_enable(struct drm_crtc *crtc, struct msm_drm_private *priv; bool request_bandwidth; - if (!crtc || !crtc->dev || !crtc->dev->dev_private) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return; } @@ -1288,13 +1297,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, { struct drm_crtc *crtc = NULL; struct dpu_crtc *dpu_crtc = NULL; - struct msm_drm_private *priv = NULL; - struct dpu_kms *kms = NULL; int i; - priv = dev->dev_private; - kms = to_dpu_kms(priv->kms); - dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL); if (!dpu_crtc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index d82ea994063f..f96e142c4361 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -645,11 +645,6 @@ static void _dpu_encoder_update_vsync_source(struct dpu_encoder_virt *dpu_enc, priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } - hw_mdptop = dpu_kms->hw_mdp; if (!hw_mdptop) { DPU_ERROR("invalid mdptop\n"); @@ -735,8 +730,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc, struct msm_drm_private *priv; bool is_vid_mode = false; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc || !drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return -EINVAL; } @@ -1092,17 +1086,13 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc || !drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } dpu_enc = to_dpu_encoder_virt(drm_enc); if (!dpu_enc || !dpu_enc->cur_master) { @@ -1184,7 +1174,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) struct dpu_encoder_virt *dpu_enc = NULL; struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - struct drm_display_mode *mode; int i = 0; if (!drm_enc) { @@ -1193,9 +1182,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) } else if (!drm_enc->dev) { DPU_ERROR("invalid dev\n"); return; - } else if (!drm_enc->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; } dpu_enc = to_dpu_encoder_virt(drm_enc); @@ -1204,8 +1190,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) mutex_lock(&dpu_enc->enc_lock); dpu_enc->enabled = false; - mode = &drm_enc->crtc->state->adjusted_mode; - priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); @@ -1734,8 +1718,7 @@ static void dpu_encoder_vsync_event_handler(struct timer_list *t) struct msm_drm_private *priv; struct msm_drm_thread *event_thread; - if (!drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return; } @@ -1914,8 +1897,6 @@ static int _dpu_encoder_debugfs_status_open(struct inode *inode, static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); - struct msm_drm_private *priv; - struct dpu_kms *dpu_kms; int i; static const struct file_operations debugfs_status_fops = { @@ -1927,14 +1908,11 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) char name[DPU_NAME_SIZE]; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid encoder or kms\n"); return -EINVAL; } - priv = drm_enc->dev->dev_private; - dpu_kms = to_dpu_kms(priv->kms); - snprintf(name, DPU_NAME_SIZE, "encoder%u", drm_enc->base.id); /* create overall sub-directory for the encoder */ @@ -2042,9 +2020,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, enum dpu_intf_type intf_type; struct dpu_enc_phys_init_params phys_params; - if (!dpu_enc || !dpu_kms) { - DPU_ERROR("invalid arg(s), enc %d kms %d\n", - dpu_enc != 0, dpu_kms != 0); + if (!dpu_enc) { + DPU_ERROR("invalid arg(s), enc %d\n", dpu_enc != 0); return -EINVAL; } @@ -2133,14 +2110,12 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) struct dpu_encoder_virt *dpu_enc = from_timer(dpu_enc, t, frame_done_timer); struct drm_encoder *drm_enc = &dpu_enc->base; - struct msm_drm_private *priv; u32 event; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } - priv = drm_enc->dev->dev_private; if (!dpu_enc->frame_busy_mask[0] || !dpu_enc->crtc_frame_event_cb) { DRM_DEBUG_KMS("id:%u invalid timeout frame_busy_mask=%lu\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index 2923b63d95fe..047960949fbb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -124,13 +124,11 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx) static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc || !phys_enc->hw_ctl) return; DPU_ATRACE_BEGIN("ctl_start_irq"); - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0); @@ -316,13 +314,9 @@ end: static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, bool enable) { - struct dpu_encoder_phys_cmd *cmd_enc; - if (!phys_enc) return; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, enable, atomic_read(&phys_enc->vblank_refcount)); @@ -355,7 +349,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( struct drm_display_mode *mode; bool tc_enable = true; u32 vsync_hz; - struct msm_drm_private *priv; struct dpu_kms *dpu_kms; if (!phys_enc || !phys_enc->hw_pp) { @@ -373,11 +366,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( } dpu_kms = phys_enc->dpu_kms; - if (!dpu_kms || !dpu_kms->dev || !dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return; - } - priv = dpu_kms->dev->dev_private; /* * TE default: dsi byte clock calculated base on 70 fps; @@ -650,13 +638,10 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete( struct dpu_encoder_phys *phys_enc) { int rc; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc) return -EINVAL; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc); if (rc) { DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index b9c84fb4d4a1..3123ef873cdf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -374,7 +374,7 @@ static void dpu_encoder_phys_vid_mode_set( struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (!phys_enc || !phys_enc->dpu_kms) { + if (!phys_enc) { DPU_ERROR("invalid encoder/kms\n"); return; } @@ -566,16 +566,13 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff( static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) { - struct msm_drm_private *priv; unsigned long lock_flags; int ret; - if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev || - !phys_enc->parent->dev->dev_private) { + if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) { DPU_ERROR("invalid encoder/device\n"); return; } - priv = phys_enc->parent->dev->dev_private; if (!phys_enc->hw_intf || !phys_enc->hw_ctl) { DPU_ERROR("invalid hw_intf %d hw_ctl %d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 58b0485dc375..6c92f0fbeac9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -30,10 +30,6 @@ #define CREATE_TRACE_POINTS #include "dpu_trace.h" -static const char * const iommu_ports[] = { - "mdp_0", -}; - /* * To enable overall DRM driver logging * # echo 0x2 > /sys/module/drm/parameters/debug @@ -68,16 +64,14 @@ static int _dpu_danger_signal_status(struct seq_file *s, bool danger_status) { struct dpu_kms *kms = (struct dpu_kms *)s->private; - struct msm_drm_private *priv; struct dpu_danger_safe_status status; int i; - if (!kms->dev || !kms->dev->dev_private || !kms->hw_mdp) { + if (!kms->hw_mdp) { DPU_ERROR("invalid arg(s)\n"); return 0; } - priv = kms->dev->dev_private; memset(&status, 0, sizeof(struct dpu_danger_safe_status)); pm_runtime_get_sync(&kms->pdev->dev); @@ -153,13 +147,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data) return 0; dev = dpu_kms->dev; - if (!dev) - return 0; - priv = dev->dev_private; - if (!priv) - return 0; - base = dpu_kms->mmio + regset->offset; /* insert padding spaces, if needed */ @@ -280,7 +268,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { struct dpu_kms *dpu_kms; - struct msm_drm_private *priv; struct drm_device *dev; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; @@ -292,10 +279,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev || !dev->dev_private) - return; - priv = dev->dev_private; - /* Call prepare_commit for all affected encoders */ for_each_new_crtc_in_state(state, crtc, crtc_state, i) { drm_for_each_encoder_mask(encoder, crtc->dev, @@ -333,7 +316,6 @@ void dpu_kms_encoder_enable(struct drm_encoder *encoder) if (funcs && funcs->commit) funcs->commit(encoder); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); drm_for_each_crtc(crtc, dev) { if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder))) continue; @@ -464,16 +446,6 @@ static void _dpu_kms_drm_obj_destroy(struct dpu_kms *dpu_kms) struct msm_drm_private *priv; int i; - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } else if (!dpu_kms->dev) { - DPU_ERROR("invalid dev\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; - } priv = dpu_kms->dev->dev_private; for (i = 0; i < priv->num_crtcs; i++) @@ -505,7 +477,6 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms) int primary_planes_idx = 0, cursor_planes_idx = 0, i, ret; int max_crtc_count; - dev = dpu_kms->dev; priv = dev->dev_private; catalog = dpu_kms->catalog; @@ -585,8 +556,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) int i; dev = dpu_kms->dev; - if (!dev) - return; if (dpu_kms->hw_intr) dpu_hw_intr_destroy(dpu_kms->hw_intr); @@ -725,8 +694,7 @@ static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms) mmu = dpu_kms->base.aspace->mmu; - mmu->funcs->detach(mmu, (const char **)iommu_ports, - ARRAY_SIZE(iommu_ports)); + mmu->funcs->detach(mmu); msm_gem_address_space_put(dpu_kms->base.aspace); dpu_kms->base.aspace = NULL; @@ -752,8 +720,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return PTR_ERR(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DPU_ERROR("failed to attach iommu %d\n", ret); msm_gem_address_space_put(aspace); @@ -803,16 +770,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev) { - DPU_ERROR("invalid device\n"); - return rc; - } - priv = dev->dev_private; - if (!priv) { - DPU_ERROR("invalid private data\n"); - return rc; - } atomic_set(&dpu_kms->bandwidth_ref, 0); @@ -974,7 +932,7 @@ struct msm_kms *dpu_kms_init(struct drm_device *dev) struct dpu_kms *dpu_kms; int irq; - if (!dev || !dev->dev_private) { + if (!dev) { DPU_ERROR("drm device node invalid\n"); return ERR_PTR(-EINVAL); } @@ -1064,11 +1022,6 @@ static int __maybe_unused dpu_runtime_suspend(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, false); if (rc) DPU_ERROR("clock disable failed rc:%d\n", rc); @@ -1086,11 +1039,6 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true); if (rc) { DPU_ERROR("clock enable failed rc:%d\n", rc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 959d03e007fa..c6169e7df19d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -139,10 +139,6 @@ struct vsync_info { #define to_dpu_kms(x) container_of(x, struct dpu_kms, base) -/* get struct msm_kms * from drm_device * */ -#define ddev_to_msm_kms(D) ((D) && (D)->dev_private ? \ - ((struct msm_drm_private *)((D)->dev_private))->kms : NULL) - /** * Debugfs functions - extra helper functions for debugfs support * diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c index 8d24b79fd400..991f4c8f8a12 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c @@ -154,10 +154,6 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms, u32 ot_lim; int ret, i; - if (!dpu_kms) { - DPU_ERROR("invalid arguments\n"); - return; - } mdp = dpu_kms->hw_mdp; for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { @@ -214,7 +210,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms, const struct dpu_vbif_qos_tbl *qos_tbl; int i; - if (!dpu_kms || !params || !dpu_kms->hw_mdp) { + if (!params || !dpu_kms->hw_mdp) { DPU_ERROR("invalid arguments\n"); return; } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 50711ccc8691..dda05436f716 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -157,10 +157,6 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, } } -static const char * const iommu_ports[] = { - "mdp_port0_cb0", "mdp_port1_cb0", -}; - static void mdp4_destroy(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); @@ -172,8 +168,7 @@ static void mdp4_destroy(struct msm_kms *kms) drm_gem_object_put_unlocked(mdp4_kms->blank_cursor_bo); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } @@ -524,8 +519,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) goto fail; } else { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index f6e71ff539ca..1f48f64539a2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -14,7 +14,7 @@ struct mdp5_cfg_handler { /* mdp5_cfg must be exposed (used in mdp5.xml.h) */ const struct mdp5_cfg_hw *mdp5_cfg = NULL; -const struct mdp5_cfg_hw msm8x74v1_config = { +static const struct mdp5_cfg_hw msm8x74v1_config = { .name = "msm8x74v1", .mdp = { .count = 1, @@ -98,7 +98,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw msm8x74v2_config = { +static const struct mdp5_cfg_hw msm8x74v2_config = { .name = "msm8x74", .mdp = { .count = 1, @@ -180,7 +180,7 @@ const struct mdp5_cfg_hw msm8x74v2_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw apq8084_config = { +static const struct mdp5_cfg_hw apq8084_config = { .name = "apq8084", .mdp = { .count = 1, @@ -275,7 +275,7 @@ const struct mdp5_cfg_hw apq8084_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x16_config = { +static const struct mdp5_cfg_hw msm8x16_config = { .name = "msm8x16", .mdp = { .count = 1, @@ -342,7 +342,7 @@ const struct mdp5_cfg_hw msm8x16_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x94_config = { +static const struct mdp5_cfg_hw msm8x94_config = { .name = "msm8x94", .mdp = { .count = 1, @@ -437,7 +437,7 @@ const struct mdp5_cfg_hw msm8x94_config = { .max_clk = 400000000, }; -const struct mdp5_cfg_hw msm8x96_config = { +static const struct mdp5_cfg_hw msm8x96_config = { .name = "msm8x96", .mdp = { .count = 1, @@ -545,7 +545,104 @@ const struct mdp5_cfg_hw msm8x96_config = { .max_clk = 412500000, }; -const struct mdp5_cfg_hw msm8917_config = { +const struct mdp5_cfg_hw msm8x76_config = { + .name = "msm8x76", + .mdp = { + .count = 1, + .caps = MDP_CAP_SMP | + MDP_CAP_DSC | + MDP_CAP_SRC_SPLIT | + 0, + }, + .ctl = { + .count = 3, + .base = { 0x01000, 0x01200, 0x01400 }, + .flush_hw_mask = 0xffffffff, + }, + .smp = { + .mmb_count = 10, + .mmb_size = 10240, + .clients = { + [SSPP_VIG0] = 1, [SSPP_VIG1] = 9, + [SSPP_DMA0] = 4, + [SSPP_RGB0] = 7, [SSPP_RGB1] = 8, + }, + }, + .pipe_vig = { + .count = 2, + .base = { 0x04000, 0x06000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SCALE | + MDP_PIPE_CAP_CSC | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_rgb = { + .count = 2, + .base = { 0x14000, 0x16000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_dma = { + .count = 1, + .base = { 0x24000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_cursor = { + .count = 1, + .base = { 0x440DC }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + MDP_PIPE_CAP_CURSOR | + 0, + }, + .lm = { + .count = 2, + .base = { 0x44000, 0x45000 }, + .instances = { + { .id = 0, .pp = 0, .dspp = 0, + .caps = MDP_LM_CAP_DISPLAY, }, + { .id = 1, .pp = -1, .dspp = -1, + .caps = MDP_LM_CAP_WB }, + }, + .nb_stages = 8, + .max_width = 2560, + .max_height = 0xFFFF, + }, + .dspp = { + .count = 1, + .base = { 0x54000 }, + + }, + .pp = { + .count = 3, + .base = { 0x70000, 0x70800, 0x72000 }, + }, + .dsc = { + .count = 2, + .base = { 0x80000, 0x80400 }, + }, + .intf = { + .base = { 0x6a000, 0x6a800, 0x6b000 }, + .connect = { + [0] = INTF_DISABLED, + [1] = INTF_DSI, + [2] = INTF_DSI, + }, + }, + .max_clk = 360000000, +}; + +static const struct mdp5_cfg_hw msm8917_config = { .name = "msm8917", .mdp = { .count = 1, @@ -630,7 +727,7 @@ const struct mdp5_cfg_hw msm8917_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8998_config = { +static const struct mdp5_cfg_hw msm8998_config = { .name = "msm8998", .mdp = { .count = 1, @@ -745,6 +842,7 @@ static const struct mdp5_cfg_handler cfg_handlers_v1[] = { { .revision = 6, .config = { .hw = &msm8x16_config } }, { .revision = 9, .config = { .hw = &msm8x94_config } }, { .revision = 7, .config = { .hw = &msm8x96_config } }, + { .revision = 11, .config = { .hw = &msm8x76_config } }, { .revision = 15, .config = { .hw = &msm8917_config } }, }; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index eb0b4b7dc7cc..05cc04f729d6 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -214,7 +214,6 @@ static void blend_setup(struct drm_crtc *crtc) struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline; struct mdp5_kms *mdp5_kms = get_kms(crtc); struct drm_plane *plane; - const struct mdp5_cfg_hw *hw_cfg; struct mdp5_plane_state *pstate, *pstates[STAGE_MAX + 1] = {NULL}; const struct mdp_format *format; struct mdp5_hw_mixer *mixer = pipeline->mixer; @@ -232,8 +231,6 @@ static void blend_setup(struct drm_crtc *crtc) u32 val; #define blender(stage) ((stage) - STAGE0) - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - spin_lock_irqsave(&mdp5_crtc->lm_lock, flags); /* ctl could be released already when we are shutting down: */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 91cd76a2bab1..e43ecd4be10a 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -19,10 +19,6 @@ #include "msm_mmu.h" #include "mdp5_kms.h" -static const char *iommu_ports[] = { - "mdp_0", -}; - static int mdp5_hw_init(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -233,8 +229,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms) mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } } @@ -314,6 +309,10 @@ int mdp5_disable(struct mdp5_kms *mdp5_kms) mdp5_kms->enable_count--; WARN_ON(mdp5_kms->enable_count < 0); + if (mdp5_kms->tbu_rt_clk) + clk_disable_unprepare(mdp5_kms->tbu_rt_clk); + if (mdp5_kms->tbu_clk) + clk_disable_unprepare(mdp5_kms->tbu_clk); clk_disable_unprepare(mdp5_kms->ahb_clk); clk_disable_unprepare(mdp5_kms->axi_clk); clk_disable_unprepare(mdp5_kms->core_clk); @@ -334,6 +333,10 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms) clk_prepare_enable(mdp5_kms->core_clk); if (mdp5_kms->lut_clk) clk_prepare_enable(mdp5_kms->lut_clk); + if (mdp5_kms->tbu_clk) + clk_prepare_enable(mdp5_kms->tbu_clk); + if (mdp5_kms->tbu_rt_clk) + clk_prepare_enable(mdp5_kms->tbu_rt_clk); return 0; } @@ -466,14 +469,11 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) { struct drm_device *dev = mdp5_kms->dev; struct msm_drm_private *priv = dev->dev_private; - const struct mdp5_cfg_hw *hw_cfg; unsigned int num_crtcs; int i, ret, pi = 0, ci = 0; struct drm_plane *primary[MAX_BASES] = { NULL }; struct drm_plane *cursor[MAX_BASES] = { NULL }; - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - /* * Construct encoders and modeset initialize connector devices * for each external display interface. @@ -737,8 +737,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DRM_DEV_ERROR(&pdev->dev, "failed to attach iommu: %d\n", ret); @@ -974,6 +973,8 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) /* optional clocks: */ get_clk(pdev, &mdp5_kms->lut_clk, "lut", false); + get_clk(pdev, &mdp5_kms->tbu_clk, "tbu", false); + get_clk(pdev, &mdp5_kms->tbu_rt_clk, "tbu_rt", false); /* we need to set a default rate before enabling. Set a safe * rate first, then figure out hw revision, and then set a diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h index d1bf4fdfc815..128866742593 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h @@ -53,6 +53,8 @@ struct mdp5_kms { struct clk *ahb_clk; struct clk *core_clk; struct clk *lut_clk; + struct clk *tbu_clk; + struct clk *tbu_rt_clk; struct clk *vsync_clk; /* diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index b31cfb554fa2..d7fa2c49e741 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -121,7 +121,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, struct mdp5_kms *mdp5_kms = get_kms(smp); int rev = mdp5_cfg_get_hw_rev(mdp5_kms->cfg); int i, hsub, nplanes, nlines; - u32 fmt = format->base.pixel_format; uint32_t blkcfg = 0; nplanes = info->num_planes; @@ -135,7 +134,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, * them together, writes to SMP using a single client. */ if ((rev > 0) && (format->chroma_sample > CHROMA_FULL)) { - fmt = DRM_FORMAT_NV24; nplanes = 2; /* if decimation is enabled, HW decimates less on the diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index b7b7c1a9164a..86ad3fdf207d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -66,6 +66,26 @@ static const struct msm_dsi_config msm8916_dsi_cfg = { .num_dsi = 1, }; +static const char * const dsi_8976_bus_clk_names[] = { + "mdp_core", "iface", "bus", +}; + +static const struct msm_dsi_config msm8976_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .reg_cfg = { + .num = 3, + .regs = { + {"gdsc", -1, -1}, + {"vdda", 100000, 100}, /* 1.2 V */ + {"vddio", 100000, 100}, /* 1.8 V */ + }, + }, + .bus_clk_names = dsi_8976_bus_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_8976_bus_clk_names), + .io_start = { 0x1a94000, 0x1a96000 }, + .num_dsi = 2, +}; + static const struct msm_dsi_config msm8994_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { @@ -147,7 +167,7 @@ static const struct msm_dsi_config sdm845_dsi_cfg = { .num_dsi = 2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_v2, .link_clk_disable = dsi_link_clk_disable_v2, .clk_init_ver = dsi_clk_init_v2, @@ -158,7 +178,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_v2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = NULL, @@ -169,7 +189,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_6g, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = dsi_clk_init_6g_v2, @@ -197,6 +217,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &msm8916_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_1, &msm8996_dsi_cfg, &msm_dsi_6g_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_2, + &msm8976_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_0, &msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1, diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index e2b7a7dfbe49..50a37ceb6a25 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -17,6 +17,7 @@ #define MSM_DSI_6G_VER_MINOR_V1_3 0x10030000 #define MSM_DSI_6G_VER_MINOR_V1_3_1 0x10030001 #define MSM_DSI_6G_VER_MINOR_V1_4_1 0x10040001 +#define MSM_DSI_6G_VER_MINOR_V1_4_2 0x10040002 #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 1e7b1be25bb0..458cec82ae13 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1293,14 +1293,13 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len) static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host, u8 *buf, int rx_byte, int pkt_size) { - u32 *lp, *temp, data; + u32 *temp, data; int i, j = 0, cnt; u32 read_cnt; u8 reg[16]; int repeated_bytes = 0; int buf_offset = buf - msm_host->rx_buf; - lp = (u32 *)buf; temp = (u32 *)reg; cnt = (rx_byte + 3) >> 2; if (cnt > 4) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 3522863a4984..b0cfa67d2a57 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -145,7 +145,7 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -175,7 +175,6 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff - val_ckln * ui, ui_x8); tmin = max_t(s32, temp, 0); @@ -262,7 +261,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -284,7 +283,6 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff, ui_x8); tmin = max_t(s32, temp, 0); @@ -485,6 +483,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", .data = &dsi_phy_28nm_hpm_cfgs }, + { .compatible = "qcom,dsi-phy-28nm-hpm-fam-b", + .data = &dsi_phy_28nm_hpm_famb_cfgs }, { .compatible = "qcom,dsi-phy-28nm-lp", .data = &dsi_phy_28nm_lp_cfgs }, #endif diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c4069ce6afe6..24b294ed3059 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -40,6 +40,7 @@ struct msm_dsi_phy_cfg { }; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index b3f678f6c2aa..c3c580cfd8b1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -39,15 +39,10 @@ static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy, DSI_28nm_PHY_TIMING_CTRL_11_TRIG3_CMD(0)); } -static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +static void dsi_28nm_phy_regulator_enable_dcdc(struct msm_dsi_phy *phy) { void __iomem *base = phy->reg_base; - if (!enable) { - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); - return; - } - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 1); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0); @@ -56,6 +51,39 @@ static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x9); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x7); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); +} + +static void dsi_28nm_phy_regulator_enable_ldo(struct msm_dsi_phy *phy) +{ + void __iomem *base = phy->reg_base; + + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0x7); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_3, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_2, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + + if (phy->cfg->type == MSM_DSI_PHY_28NM_LP) + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x05); + else + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x0d); +} + +static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +{ + if (!enable) { + dsi_phy_write(phy->reg_base + + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + return; + } + + if (phy->regulator_ldo_mode) + dsi_28nm_phy_regulator_enable_ldo(phy); + else + dsi_28nm_phy_regulator_enable_dcdc(phy); } static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, @@ -77,8 +105,6 @@ static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, dsi_28nm_phy_regulator_ctrl(phy, true); - dsi_phy_write(base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); - dsi_28nm_dphy_set_timing(phy, timing); dsi_phy_write(base + REG_DSI_28nm_PHY_CTRL_1, 0x00); @@ -142,6 +168,24 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = { .num_dsi_phy = 2, }; +const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs = { + .type = MSM_DSI_PHY_28NM_HPM, + .src_pll_truthtable = { {true, true}, {false, true} }, + .reg_cfg = { + .num = 1, + .regs = { + {"vddio", 100000, 100}, + }, + }, + .ops = { + .enable = dsi_28nm_phy_enable, + .disable = dsi_28nm_phy_disable, + .init = msm_dsi_phy_init_common, + }, + .io_start = { 0x1a94400, 0x1a96400 }, + .num_dsi_phy = 2, +}; + const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = { .type = MSM_DSI_PHY_28NM_LP, .src_pll_truthtable = { {true, true}, {true, true} }, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 1697e61f9c2f..8a38d4b95102 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -29,8 +29,12 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) reg = devm_regulator_get(dev, cfg->reg_names[i]); if (IS_ERR(reg)) { ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev, "failed to get phy regulator: %s (%d)\n", - cfg->reg_names[i], ret); + if (ret != -EPROBE_DEFER) { + DRM_DEV_ERROR(dev, + "failed to get phy regulator: %s (%d)\n", + cfg->reg_names[i], ret); + } + return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index a052364a5d74..18f3a5c53ffb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -16,6 +16,7 @@ #include <linux/pm_opp.h> #include <linux/devfreq.h> #include <linux/devcoredump.h> +#include <linux/sched/task.h> /* * Power Management: @@ -838,7 +839,7 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, return ERR_CAST(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { msm_gem_address_space_put(aspace); return ERR_PTR(ret); @@ -995,8 +996,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); if (!IS_ERR_OR_NULL(gpu->aspace)) { - gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, - NULL, 0); + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); msm_gem_address_space_put(gpu->aspace); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 34f643a0c28a..34980d8eb7ad 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -21,14 +21,12 @@ struct msm_gpummu { #define GPUMMU_PAGE_SIZE SZ_4K #define TABLE_SIZE (sizeof(uint32_t) * GPUMMU_VA_RANGE / GPUMMU_PAGE_SIZE) -static int msm_gpummu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_gpummu_attach(struct msm_mmu *mmu) { return 0; } -static void msm_gpummu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_gpummu_detach(struct msm_mmu *mmu) { } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 8c95c31e2b12..ad58cfe5998e 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -23,16 +23,14 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, return 0; } -static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_iommu_attach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); return iommu_attach_device(iommu->domain, mmu->dev); } -static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 871d56303697..67a623f14319 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -10,8 +10,8 @@ #include <linux/iommu.h> struct msm_mmu_funcs { - int (*attach)(struct msm_mmu *mmu, const char * const *names, int cnt); - void (*detach)(struct msm_mmu *mmu, const char * const *names, int cnt); + int (*attach)(struct msm_mmu *mmu); + void (*detach)(struct msm_mmu *mmu); int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt, unsigned len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, unsigned len); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index c7832a951039..af7ceb246c7c 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -298,7 +298,7 @@ void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_submit *submit, int idx, - uint64_t iova, uint32_t size) + uint64_t iova, uint32_t size, bool full) { struct msm_gem_object *obj = submit->bos[idx].obj; unsigned offset = 0; @@ -318,6 +318,9 @@ static void snapshot_buf(struct msm_rd_state *rd, rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + if (!full) + return; + /* But only dump the contents of buffers marked READ */ if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; @@ -381,18 +384,21 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); for (i = 0; i < submit->nr_bos; i++) - if (should_dump(submit, i)) - snapshot_buf(rd, submit, i, 0, 0); + snapshot_buf(rd, submit, i, 0, 0, should_dump(submit, i)); for (i = 0; i < submit->nr_cmds; i++) { - uint64_t iova = submit->cmd[i].iova; uint32_t szd = submit->cmd[i].size; /* in dwords */ /* snapshot cmdstream bo's (if we haven't already): */ if (!should_dump(submit, i)) { snapshot_buf(rd, submit, submit->cmd[i].idx, - submit->cmd[i].iova, szd * 4); + submit->cmd[i].iova, szd * 4, true); } + } + + for (i = 0; i < submit->nr_cmds; i++) { + uint64_t iova = submit->cmd[i].iova; + uint32_t szd = submit->cmd[i].size; /* in dwords */ switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 9d086133a84e..9458dc6c750c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev) return 0; err_out2: + pm_runtime_disable(pfdev->dev); panfrost_devfreq_fini(pfdev); err_out1: panfrost_device_fini(pfdev); err_out0: - pm_runtime_disable(pfdev->dev); drm_dev_put(ddev); return err; } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index bdd990568476..a3ed64a1f15e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size) return SZ_2M; } -void panfrost_mmu_flush_range(struct panfrost_device *pfdev, - struct panfrost_mmu *mmu, - u64 iova, size_t size) +static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, size_t size) { if (mmu->as < 0) return; @@ -406,11 +406,11 @@ addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) spin_lock(&pfdev->as_lock); list_for_each_entry(mmu, &pfdev->as_lru_list, list) { if (as == mmu->as) - break; + goto found_mmu; } - if (as != mmu->as) - goto out; + goto out; +found_mmu: priv = container_of(mmu, struct panfrost_file_priv, mmu); spin_lock(&priv->mm_lock); @@ -432,7 +432,8 @@ out: #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) -int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) +static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, + u64 addr) { int ret, i; struct panfrost_gem_object *bo; diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 83c57d325ca8..2dba192bf198 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -16,6 +16,7 @@ #include "panfrost_issues.h" #include "panfrost_job.h" #include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" #include "panfrost_regs.h" #define COUNTERS_PER_BLOCK 64 diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index daff9a2af3be..acabeaf28732 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6965,8 +6965,8 @@ static int cik_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* XXX this should actually be a bus address, not an MC address. same on older asics */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7089dfc8c2a9..110fb38004b1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 840401413c58..f5f2ffea5ab2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) track->textures[i].lookup_disable = true; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e937cc01910d..033bc466a862 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3696,8 +3696,8 @@ int r600_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 41f08321a361..fd74e2611185 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,10 +379,6 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { -#ifdef CONFIG_PPC64 - struct drm_device *ddev = pci_get_drvdata(pdev); -#endif - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ @@ -390,13 +386,14 @@ radeon_pci_shutdown(struct pci_dev *pdev) radeon_pci_remove(pdev); #ifdef CONFIG_PPC64 - /* Some adapters need to be suspended before a + /* + * Some adapters need to be suspended before a * shutdown occurs in order to prevent an error * during kexec. * Make this power specific becauase it breaks * some non-power boards. */ - radeon_suspend_kms(ddev, true, true, false); + radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false); #endif } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 05894d198a79..1d8efb0eefdb 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5997,8 +5997,8 @@ static int si_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 460fd98e40a7..a0b382a637a6 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 1a5153197fe9..461a7a8129f4 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -23,6 +23,7 @@ #include <linux/kthread.h> #include <linux/slab.h> +#include <linux/completion.h> #include <drm/drm_print.h> #include <drm/gpu_scheduler.h> @@ -68,6 +69,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, if (!entity->rq_list) return -ENOMEM; + init_completion(&entity->entity_idle); + for (i = 0; i < num_rq_list; ++i) entity->rq_list[i] = rq_list[i]; @@ -286,11 +289,12 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) */ if (spsc_queue_count(&entity->job_queue)) { if (sched) { - /* Park the kernel for a moment to make sure it isn't processing - * our enity. + /* + * Wait for thread to idle to make sure it isn't processing + * this entity. */ - kthread_park(sched->thread); - kthread_unpark(sched->thread); + wait_for_completion(&entity->entity_idle); + } if (entity->dependency) { dma_fence_remove_callback(entity->dependency, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2af64459b3d7..3c57e84222ca 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -47,6 +47,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/sched.h> +#include <linux/completion.h> #include <uapi/linux/sched/types.h> #include <drm/drm_print.h> @@ -134,6 +135,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) list_for_each_entry_continue(entity, &rq->entities, list) { if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -144,6 +146,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -496,8 +499,10 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) fence = sched->ops->run_job(s_job); if (IS_ERR_OR_NULL(fence)) { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + s_job->s_fence->parent = NULL; - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); } else { s_job->s_fence->parent = fence; } @@ -645,9 +650,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) struct drm_sched_job *job; unsigned long flags; - /* Don't destroy jobs while the timeout worker is running */ - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !cancel_delayed_work(&sched->work_tdr)) + /* + * Don't destroy jobs while the timeout worker is running OR thread + * is being parked and hence assumed to not touch ring_mirror_list + */ + if ((sched->timeout != MAX_SCHEDULE_TIMEOUT && + !cancel_delayed_work(&sched->work_tdr)) || + __kthread_should_park(sched->thread)) return NULL; spin_lock_irqsave(&sched->job_list_lock, flags); @@ -724,6 +733,9 @@ static int drm_sched_main(void *param) continue; sched_job = drm_sched_entity_pop_job(entity); + + complete(&entity->entity_idle); + if (!sched_job) continue; @@ -746,8 +758,9 @@ static int drm_sched_main(void *param) r); dma_fence_put(fence); } else { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); drm_sched_process_job(NULL, &sched_job->cb); } diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 5b1f9ff97576..714af052fbef 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -837,16 +837,15 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, static void tegra_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct tegra_bo *bo = tegra_fb_get_plane(plane->state->fb, 0); + struct tegra_plane_state *state = to_tegra_plane_state(plane->state); struct tegra_dc *dc = to_tegra_dc(plane->state->crtc); - struct drm_plane_state *state = plane->state; u32 value = CURSOR_CLIP_DISPLAY; /* rien ne va plus */ if (!plane->state->crtc || !plane->state->fb) return; - switch (state->crtc_w) { + switch (plane->state->crtc_w) { case 32: value |= CURSOR_SIZE_32x32; break; @@ -864,16 +863,16 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, break; default: - WARN(1, "cursor size %ux%u not supported\n", state->crtc_w, - state->crtc_h); + WARN(1, "cursor size %ux%u not supported\n", + plane->state->crtc_w, plane->state->crtc_h); return; } - value |= (bo->iova >> 10) & 0x3fffff; + value |= (state->iova[0] >> 10) & 0x3fffff; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (bo->iova >> 32) & 0x3; + value = (state->iova[0] >> 32) & 0x3; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -892,7 +891,8 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); /* position the cursor */ - value = (state->crtc_y & 0x3fff) << 16 | (state->crtc_x & 0x3fff); + value = (plane->state->crtc_y & 0x3fff) << 16 | + (plane->state->crtc_x & 0x3fff); tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } @@ -2017,7 +2017,7 @@ static int tegra_dc_init(struct host1x_client *client) dev_warn(dc->dev, "failed to allocate syncpoint\n"); err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(client->dev, "failed to attach to domain: %d\n", err); return err; } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 56e5e7a5c108..f455ce71e85d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -920,10 +920,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) if (tegra->domain) { group = iommu_group_get(client->dev); - if (!group) { - dev_err(client->dev, "failed to get IOMMU group\n"); + if (!group) return -ENODEV; - } if (domain != tegra->domain) { err = iommu_attach_group(tegra->domain, group); @@ -1243,6 +1241,9 @@ static int host1x_drm_remove(struct host1x_device *dev) drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); + err = host1x_device_exit(dev); if (err < 0) dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 746dae32c484..bc15b430156d 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,6 +27,29 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); } +/* XXX move this into lib/scatterlist.c? */ +static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, + unsigned int nents, gfp_t gfp_mask) +{ + struct scatterlist *dst; + unsigned int i; + int err; + + err = sg_alloc_table(sgt, nents, gfp_mask); + if (err < 0) + return err; + + dst = sgt->sgl; + + for (i = 0; i < nents; i++) { + sg_set_page(dst, sg_page(sg), sg->length, 0); + dst = sg_next(dst); + sg = sg_next(sg); + } + + return 0; +} + static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, dma_addr_t *phys) { @@ -52,11 +75,31 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, return ERR_PTR(-ENOMEM); if (obj->pages) { + /* + * If the buffer object was allocated from the explicit IOMMU + * API code paths, construct an SG table from the pages. + */ err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, 0, obj->gem.size, GFP_KERNEL); if (err < 0) goto free; + } else if (obj->sgt) { + /* + * If the buffer object already has an SG table but no pages + * were allocated for it, it means the buffer was imported and + * the SG table needs to be copied to avoid overwriting any + * other potential users of the original SG table. + */ + err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents, + GFP_KERNEL); + if (err < 0) + goto free; } else { + /* + * If the buffer object had no pages allocated and if it was + * not imported, it had to be allocated with the DMA API, so + * the DMA API helper can be used. + */ err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, obj->gem.size); if (err < 0) @@ -397,13 +440,6 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, err = tegra_bo_iommu_map(tegra, bo); if (err < 0) goto detach; - } else { - if (bo->sgt->nents > 1) { - err = -EINVAL; - goto detach; - } - - bo->iova = sg_dma_address(bo->sgt->sgl); } bo->gem.import_attach = attach; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 2b4082d0bc9e..47d985ac7cd7 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -605,11 +605,8 @@ static struct tegra_display_hub_state * tegra_display_hub_get_state(struct tegra_display_hub *hub, struct drm_atomic_state *state) { - struct drm_device *drm = dev_get_drvdata(hub->client.parent); struct drm_private_state *priv; - WARN_ON(!drm_modeset_is_locked(&drm->mode_config.connection_mutex)); - priv = drm_atomic_get_private_obj_state(state, &hub->base); if (IS_ERR(priv)) return ERR_CAST(priv); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 163b590be224..cadcdd9ea427 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -129,6 +129,17 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) goto unpin; } + /* + * The display controller needs contiguous memory, so + * fail if the buffer is discontiguous and we fail to + * map its SG table to a single contiguous chunk of + * I/O virtual memory. + */ + if (err > 1) { + err = -EINVAL; + goto unpin; + } + state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 615cb319fa8b..a68d3b36b972 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3912,8 +3912,7 @@ static int tegra_sor_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_sor_suspend(struct device *dev) +static int tegra_sor_runtime_suspend(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3935,7 +3934,7 @@ static int tegra_sor_suspend(struct device *dev) return 0; } -static int tegra_sor_resume(struct device *dev) +static int tegra_sor_runtime_resume(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3967,10 +3966,39 @@ static int tegra_sor_resume(struct device *dev) return 0; } -#endif + +static int tegra_sor_suspend(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_disable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} + +static int tegra_sor_resume(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_enable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} static const struct dev_pm_ops tegra_sor_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL) + SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume) }; struct platform_driver tegra_sor_driver = { diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 9444ba183990..3526c2892ddb 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client) int err; err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(vic->dev, "failed to attach to domain: %d\n", err); return err; } @@ -386,13 +386,14 @@ static const struct vic_config vic_t194_config = { .supports_sid = true, }; -static const struct of_device_id vic_match[] = { +static const struct of_device_id tegra_vic_of_match[] = { { .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config }, { .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config }, { .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config }, { .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config }, { }, }; +MODULE_DEVICE_TABLE(of, tegra_vic_of_match); static int vic_probe(struct platform_device *pdev) { @@ -516,7 +517,7 @@ static const struct dev_pm_ops vic_pm_ops = { struct platform_driver tegra_vic_driver = { .driver = { .name = "tegra-vic", - .of_match_table = vic_match, + .of_match_table = tegra_vic_of_match, .pm = &vic_pm_ops }, .probe = vic_probe, diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile index 01fc670ce7a2..caea2a099496 100644 --- a/drivers/gpu/drm/ttm/Makefile +++ b/drivers/gpu/drm/ttm/Makefile @@ -4,8 +4,8 @@ ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \ ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ - ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o \ - ttm_page_alloc_dma.o + ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o ttm-$(CONFIG_AGP) += ttm_agp_backend.o +ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o obj-$(CONFIG_DRM_TTM) += ttm.o diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index ff54e7609e8f..bf876faea592 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -33,7 +33,6 @@ * when freed). */ -#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) #define pr_fmt(fmt) "[TTM] " fmt #include <linux/dma-mapping.h> @@ -1238,5 +1237,3 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data) return 0; } EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs); - -#endif diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 4c4b59ae2c81..549dde83408b 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -560,14 +560,17 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (args->bcl_start != args->bcl_end) { bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); - if (!bin) + if (!bin) { + v3d_job_put(&render->base); return -ENOMEM; + } ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl); if (ret) { kfree(bin); v3d_job_put(&render->base); + kfree(bin); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 81a95651643f..e962048f65d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -576,8 +576,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) else dev_priv->map_mode = vmw_dma_map_populate; - /* No TTM coherent page pool? FIXME: Ask TTM instead! */ - if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && + if (!IS_ENABLED(CONFIG_DRM_TTM_DMA_PAGE_POOL) && (dev_priv->map_mode == vmw_dma_alloc_coherent)) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 29d8794f0421..de0530b4dc1b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -336,7 +336,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) { struct vmw_private *dev_priv = res->dev_priv; - struct vmw_surface *srf; void *cmd; if (res->func->destroy == vmw_gb_surface_destroy) { @@ -360,7 +359,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) */ mutex_lock(&dev_priv->cmdbuf_mutex); - srf = vmw_res_to_srf(res); dev_priv->used_memory_size -= res->backup_size; mutex_unlock(&dev_priv->cmdbuf_mutex); } |

