summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/Kconfig7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile19
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h4
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c26
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c29
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c62
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c15
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/renoir_ppt.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c2
-rw-r--r--drivers/gpu/drm/arc/arcpgu_drv.c16
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.c3
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c4
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c15
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c6
-rw-r--r--drivers/gpu/drm/drm_self_refresh_helper.c18
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c6
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.c17
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile2
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c61
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c39
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c111
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c67
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c87
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c166
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c15
-rw-r--r--drivers/gpu/drm/i915/i915_active.c58
-rw-r--r--drivers/gpu/drm/i915/i915_active_types.h1
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c435
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h35
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c16
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c8
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c29
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c10
-rw-r--r--drivers/gpu/drm/i915/i915_query.c7
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h12
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c6
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c8
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.c21
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.h45
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c50
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c145
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.h2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h22
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.c24
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.h4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c24
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c15
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.c1
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c1
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c12
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c23
-rw-r--r--drivers/gpu/drm/ttm/Makefile4
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c3
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c2
126 files changed, 2005 insertions, 602 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 617d9c3a86c3..1168351267fd 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -179,6 +179,13 @@ config DRM_TTM
GPU memory types. Will be enabled automatically if a device driver
uses it.
+config DRM_TTM_DMA_PAGE_POOL
+ bool
+ depends on DRM_TTM && (SWIOTLB || INTEL_IOMMU)
+ default y
+ help
+ Choose this if you need the TTM dma page pool
+
config DRM_VRAM_HELPER
tristate
depends on DRM
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0bb08e0e1611..bcc5d40a8d5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -977,6 +977,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 649e68c4479b..d1495e1c9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int i, r;
start_jiffies = jiffies;
@@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
if (r)
goto exit_do_move;
- dma_fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
- if (fence)
- dma_fence_put(fence);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 6614d8a6f4c8..2cdaf3b2a721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
+ for (i = 0; i < num_entities; i++) {
+ mutex_lock(&ctx->adev->lock_reset);
drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ mutex_unlock(&ctx->adev->lock_reset);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 693f17e78791..8e6726e0d035 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -859,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
int r = 0, i;
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -884,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
kthread_unpark(ring->sched.thread);
}
+ mutex_unlock(&adev->lock_reset);
+
return 0;
}
@@ -1036,6 +1041,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
if (!fences)
return -ENOMEM;
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* stop the scheduler */
kthread_park(ring->sched.thread);
@@ -1075,6 +1083,8 @@ failure:
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
+ mutex_unlock(&adev->lock_reset);
+
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
kfree(fences);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d36d2b093539..58f6b3b92831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2057,6 +2057,7 @@ out:
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2082,8 +2083,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
- /* set to low pstate by default */
- amdgpu_xgmi_set_pstate(adev, 0);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
return 0;
}
@@ -3020,6 +3052,13 @@ fence_driver_init:
DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
}
+ /*
+ * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+ * Otherwise the mgpu fan boost feature will be skipped due to the
+ * gpu instance is counted less.
+ */
+ amdgpu_register_gpu_instance(adev);
+
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1d6ce018c8f2..557be89421a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1001,6 +1001,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
{0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
{0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
/* Renoir */
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
+ bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 555d8e57fae9..406736a1bd3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -127,18 +127,48 @@ struct amdgpu_xgmi {
};
struct amdgpu_gmc {
+ /* FB's physical address in MMIO space (for CPU to
+ * map FB). This is different compared to the agp/
+ * gart/vram_start/end field as the later is from
+ * GPU's view and aper_base is from CPU's view.
+ */
resource_size_t aper_size;
resource_size_t aper_base;
/* for some chips with <= 32MB we need to lie
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
+ /* AGP aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+ * Under VMID0, logical address == MC address. AGP
+ * aperture maps to physical bus or IOVA addressed.
+ * AGP aperture is used to simulate FB in ZFB case.
+ * AGP aperture is also used for page table in system
+ * memory (mainly for APU).
+ *
+ */
u64 agp_size;
u64 agp_start;
u64 agp_end;
+ /* GART aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+ * registers
+ * Under VMID0, logical address inside GART aperture will
+ * be translated through gpuvm gart page table to access
+ * paged system memory
+ */
u64 gart_size;
u64 gart_start;
u64 gart_end;
+ /* Frame buffer aperture of this GPU device. Different from
+ * fb_start (see below), this only covers the local GPU device.
+ * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
+ * and calculate vram_start of this local device by adding an
+ * offset inside the XGMI hive.
+ * Under VMID0, logical address == MC address
+ */
u64 vram_start;
u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 6f3b03f6224f..30d540d23b77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -311,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
drm_irq_uninstall(adev->ddev);
adev->irq.installed = false;
if (adev->irq.msi_enabled)
- pci_disable_msi(adev->pdev);
+ pci_free_irq_vectors(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 137a8573d556..82c46c4faaad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
- amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dab90c280476..404483437bd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* As their names indicate, inject operation will write the
* value to the address.
*
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
* It has three kinds of operations.
*
* - 0: disable RAS on the block. Take ::head as its data.
@@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
+ *
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
*
* .. code-block:: bash
*
* echo op block [error [sub_block address value]] > .../ras/ras_ctrl
*
+ * Parameters:
+ *
* op: disable, enable, inject
* disable: only block is needed
* enable: block and error are needed
@@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* .. note::
- * Operation is only allowed on blocks which are supported.
+ * Operations are only allowed on blocks which are supported.
* Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
@@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
* Some boards contain an EEPROM which is used to persistently store a list of
- * bad pages containing ECC errors detected in vram. This interface provides
+ * bad pages which experiences ECC errors in vram. This interface provides
* a way to reset the EEPROM, e.g., after testing error injection.
*
* Usage:
@@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
/**
* DOC: AMDGPU RAS sysfs Error Count Interface
*
- * It allows user to read the error count for each IP block on the gpu through
+ * It allows the user to read the error count for each IP block on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* It outputs the multiple lines which report the uncorrected (ue) and corrected
@@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b66d29d5ffa2..b158230af8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
@@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d5d916169226..61d9b7774d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1906,9 +1906,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev)
void *stolen_vga_buf;
/* return the VGA stolen memory (if any) back to VRAM */
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
-
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
}
/**
@@ -1921,7 +1918,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_ttm_debugfs_fini(adev);
amdgpu_ttm_training_reserve_vram_fini(adev);
+ /* return the IP Discovery TMR memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
adev->mman.aper_base_kaddr = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e775f271f1ed..598c24505c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1418,6 +1418,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
+ /* make sure that the page tables covering the address range are
+ * actually allocated
+ */
r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
params->direct);
if (r)
@@ -1491,7 +1494,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
} while (frag_start < entry_end);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries */
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, &cursor);
amdgpu_vm_pt_next(adev, &cursor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 00371713c671..61d13d8b7b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,22 +274,55 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
+ bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
if (!hive)
return 0;
- if (hive->pstate == pstate)
- return 0;
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ adev->pstate = is_high_pstate ? pstate : adev->pstate;
+ goto out;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
if (is_support_sw_smu_xgmi(adev))
ret = smu_set_xgmi_pstate(&adev->smu, pstate);
- if (ret)
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_xgmi_pstate)
+ ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
+ pstate);
+
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate || is_high_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -364,6 +397,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index e7bab4f094b3..a93dd3dc0902 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.cp_fw_write_wait = false;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
+ if ((adev->gfx.me_fw_version >= 0x00000046) &&
+ (adev->gfx.me_feature_version >= 27) &&
+ (adev->gfx.pfp_fw_version >= 0x00000068) &&
+ (adev->gfx.pfp_feature_version >= 27) &&
+ (adev->gfx.mec_fw_version >= 0x0000005b) &&
+ (adev->gfx.mec_feature_version >= 27))
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->gfx.cp_fw_write_wait == false)
+ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+ GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
}
+ gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
dev_err(adev->dev,
@@ -4766,6 +4793,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = false;
+
+ fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5156,6 +5201,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_tmz = gfx_v10_0_ring_emit_tmz,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5189,6 +5235,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5219,6 +5266,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 39297baedfb4..3ebd5c20dfd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -986,6 +986,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
+ if ((adev->gfx.mec_fw_version < 0x000001a5) ||
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46))
+ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+ GRBM requires 1-cycle delay in cp firmware\n");
+
switch (adev->asic_type) {
case CHIP_VEGA10:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
@@ -1057,6 +1064,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS;
break;
+ case CHIP_RENOIR:
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
default:
break;
}
@@ -2725,7 +2738,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- gfx_v9_1_init_rlc_save_restore_list(adev);
+ if (adev->asic_type == CHIP_VEGA12 ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id >= 8))
+ gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
}
@@ -3876,9 +3892,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
uint64_t clock;
mutex_lock(&adev->gfx.gpu_clock_mutex);
- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+ uint32_t tmp, lsb, msb, i = 0;
+ do {
+ if (i != 0)
+ udelay(1);
+ tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+ msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ i++;
+ } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+ clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ }
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6e1b25bd1fe7..27f68d32bfec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
return pd_addr;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 657970f9ebfb..2c5adfe803a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index a55a2e83fb19..af68f9815f28 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -539,6 +539,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+
+ return 0;
+}
+
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -586,6 +596,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.need_full_reset = &nv_need_full_reset,
.get_pcie_usage = &nv_get_pcie_usage,
.need_reset_on_init = &nv_need_reset_on_init,
+ .get_pcie_replay_count = &nv_get_pcie_replay_count,
};
static int nv_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index b8fdb192f6d6..f4ad2990f973 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
static int sdma_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
/* sdma_v5_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.pad_ib = sdma_v5_0_ring_pad_ib,
.emit_wreg = sdma_v5_0_ring_emit_wreg,
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 741b564b4aa5..8e1640bc07af 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1145,7 +1145,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
- adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG;
} else if (adev->pdev->device == 0x15d8) {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -1188,7 +1190,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
- adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG;
}
break;
case CHIP_ARCTURUS:
@@ -1233,11 +1237,6 @@ static int soc15_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x91;
-
- if (adev->pm.pp_feature & PP_GFXOFF_MASK)
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_RLC_SMU_HS;
break;
default:
/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0a6173d727e3..caba9ecac723 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4239,8 +4239,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
result = MODE_OK;
else
DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
- mode->vdisplay,
mode->hdisplay,
+ mode->vdisplay,
mode->clock,
dc_result);
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 985633c08a26..26c6d735cdc7 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -24,15 +24,20 @@
# It calculates Bandwidth and Watermarks values for HW programming
#
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+calcs_ccflags := -mhard-float -msse
-calcs_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+calcs_ccflags += -mpreferred-stack-boundary=4
+else
calcs_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index cc94c1a73daa..12ba6fdf89b7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3027,15 +3027,6 @@ void core_link_enable_stream(
CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
COLOR_DEPTH_UNDEFINED);
- /* This second call is needed to reconfigure the DIG
- * as a workaround for the incorrect value being applied
- * from transmitter control.
- */
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
- stream->link->link_enc->funcs->setup(
- stream->link->link_enc,
- pipe_ctx->stream->signal);
-
#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
if (pipe_ctx->stream->timing.flags.DSC) {
if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index ddb8d5649e79..63f3bddba7da 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
DCN20 += dcn20_dsc.o
endif
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 924c2e303588..bbd1c98564be 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1152,6 +1152,11 @@ struct stream_encoder *dcn20_stream_encoder_create(
if (!enc1)
return NULL;
+ if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) {
+ if (eng_id >= ENGINE_ID_DIGD)
+ eng_id++;
+ }
+
dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
&stream_enc_regs[eng_id],
&se_shift, &se_mask);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index 19fabac13c65..14113ccf498d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -3,15 +3,20 @@
DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o dcn21_hwseq.o dcn21_link_encoder.o
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 5b2a65b42403..8df251626e22 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -24,15 +24,20 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dml_ccflags := -mhard-float -msse
-dml_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dml_ccflags += -mpreferred-stack-boundary=4
+else
dml_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index b456cd23c6fa..970737217e53 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,15 +1,20 @@
#
# Makefile for the 'dsc' sub-component of DAL.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dsc_ccflags := -mhard-float -msse
-dsc_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dsc_ccflags += -mpreferred-stack-boundary=4
+else
dsc_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5902f80d1fce..a7f92d0b3a90 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -220,6 +220,9 @@ enum pp_df_cstate {
((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \
(support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT)
+#define XGMI_MODE_PSTATE_D3 0
+#define XGMI_MODE_PSTATE_D0 1
+
struct seq_file;
enum amd_pp_clock_type;
struct amd_pp_simple_clock_info;
@@ -318,6 +321,7 @@ struct amd_pm_funcs {
int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks);
int (*asic_reset_mode_2)(void *handle);
int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
+ int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
};
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index f4ff15378e61..7932eb163a00 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -969,6 +969,14 @@ static int pp_dpm_switch_power_profile(void *handle,
workload = hwmgr->workload_setting[index];
}
+ if (type == PP_SMC_POWER_PROFILE_COMPUTE &&
+ hwmgr->hwmgr_func->disable_power_features_for_compute_performance) {
+ if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) {
+ mutex_unlock(&hwmgr->smu_lock);
+ return -EINVAL;
+ }
+ }
+
if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0);
mutex_unlock(&hwmgr->smu_lock);
@@ -1566,6 +1574,23 @@ static int pp_set_df_cstate(void *handle, enum pp_df_cstate state)
return 0;
}
+static int pp_set_xgmi_pstate(void *handle, uint32_t pstate)
+{
+ struct pp_hwmgr *hwmgr = handle;
+
+ if (!hwmgr)
+ return -EINVAL;
+
+ if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate)
+ return 0;
+
+ mutex_lock(&hwmgr->smu_lock);
+ hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate);
+ mutex_unlock(&hwmgr->smu_lock);
+
+ return 0;
+}
+
static const struct amd_pm_funcs pp_dpm_funcs = {
.load_firmware = pp_dpm_load_fw,
.wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
@@ -1625,4 +1650,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
.asic_reset_mode_2 = pp_asic_reset_mode_2,
.smu_i2c_bus_access = pp_smu_i2c_bus_access,
.set_df_cstate = pp_set_df_cstate,
+ .set_xgmi_pstate = pp_set_xgmi_pstate,
};
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 4e468b0272c3..1e2da4d37567 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -383,14 +383,25 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type)
return true;
}
-
+/**
+ * smu_dpm_set_power_gate - power gate/ungate the specific IP block
+ *
+ * @smu: smu_context pointer
+ * @block_type: the IP block to power gate/ungate
+ * @gate: to power gate if true, ungate otherwise
+ *
+ * This API uses no smu->mutex lock protection due to:
+ * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce).
+ * This is guarded to be race condition free by the caller.
+ * 2. Or get called on user setting request of power_dpm_force_performance_level.
+ * Under this case, the smu->mutex lock protection is already enforced on
+ * the parent API smu_force_performance_level of the call path.
+ */
int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
bool gate)
{
int ret = 0;
- mutex_lock(&smu->mutex);
-
switch (block_type) {
case AMD_IP_BLOCK_TYPE_UVD:
ret = smu_dpm_set_uvd_enable(smu, gate);
@@ -408,8 +419,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
break;
}
- mutex_unlock(&smu->mutex);
-
return ret;
}
@@ -526,7 +535,7 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
{
- if (amdgpu_dpm != 1)
+ if (!is_support_sw_smu(adev))
return false;
if (adev->asic_type == CHIP_VEGA20)
@@ -705,6 +714,9 @@ static int smu_set_funcs(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
+ if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
+ smu->od_enabled = true;
+
switch (adev->asic_type) {
case CHIP_VEGA20:
vega20_set_ppt_funcs(smu);
@@ -716,6 +728,8 @@ static int smu_set_funcs(struct amdgpu_device *adev)
break;
case CHIP_ARCTURUS:
arcturus_set_ppt_funcs(smu);
+ /* OD is not supported on Arcturus */
+ smu->od_enabled =false;
break;
case CHIP_RENOIR:
renoir_set_ppt_funcs(smu);
@@ -724,9 +738,6 @@ static int smu_set_funcs(struct amdgpu_device *adev)
return -EINVAL;
}
- if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
- smu->od_enabled = true;
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 4ea63a2e17da..d71a492c87a3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3689,6 +3689,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
PP_ASSERT_WITH_CODE(!result,
"Failed to upload PPtable!", return result);
+ /*
+ * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag.
+ * That effectively disables AVFS feature.
+ */
+ if(hwmgr->hardcode_pp_table != NULL)
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+
vega10_update_avfs(hwmgr);
/*
@@ -5263,6 +5270,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_
return 0;
}
+static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ uint32_t feature_mask = 0;
+
+ if (disable) {
+ feature_mask |= data->smu_features[GNLD_ULV].enabled ?
+ data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
+ feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ?
+ data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
+ feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ?
+ data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
+ feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ?
+ data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
+ feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ?
+ data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
+ } else {
+ feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ?
+ data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
+ feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ?
+ data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
+ feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ?
+ data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
+ feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ?
+ data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
+ feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ?
+ data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
+ }
+
+ if (feature_mask)
+ PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
+ !disable, feature_mask),
+ "enable/disable power features for compute performance Failed!",
+ return -EINVAL);
+
+ if (disable) {
+ data->smu_features[GNLD_ULV].enabled = false;
+ data->smu_features[GNLD_DS_GFXCLK].enabled = false;
+ data->smu_features[GNLD_DS_SOCCLK].enabled = false;
+ data->smu_features[GNLD_DS_LCLK].enabled = false;
+ data->smu_features[GNLD_DS_DCEFCLK].enabled = false;
+ } else {
+ data->smu_features[GNLD_ULV].enabled = true;
+ data->smu_features[GNLD_DS_GFXCLK].enabled = true;
+ data->smu_features[GNLD_DS_SOCCLK].enabled = true;
+ data->smu_features[GNLD_DS_LCLK].enabled = true;
+ data->smu_features[GNLD_DS_DCEFCLK].enabled = true;
+ }
+
+ return 0;
+
+}
+
static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.backend_init = vega10_hwmgr_backend_init,
.backend_fini = vega10_hwmgr_backend_fini,
@@ -5330,6 +5390,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.get_ppfeature_status = vega10_get_ppfeature_status,
.set_ppfeature_status = vega10_set_ppfeature_status,
.set_mp1_state = vega10_set_mp1_state,
+ .disable_power_features_for_compute_performance =
+ vega10_disable_power_features_for_compute_performance,
};
int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9295bd90b792..5bcf0d684151 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -4176,6 +4176,20 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr,
return ret;
}
+static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr,
+ uint32_t pstate)
+{
+ int ret;
+
+ ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetXgmiMode,
+ pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
+ if (ret)
+ pr_err("SetXgmiPstate failed!\n");
+
+ return ret;
+}
+
static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
/* init/fini related */
.backend_init = vega20_hwmgr_backend_init,
@@ -4245,6 +4259,7 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
.set_mp1_state = vega20_set_mp1_state,
.smu_i2c_bus_access = vega20_smu_i2c_bus_access,
.set_df_cstate = vega20_set_df_cstate,
+ .set_xgmi_pstate = vega20_set_xgmi_pstate,
};
int vega20_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index bd8c922dfd3e..af977675fd33 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -356,6 +356,9 @@ struct pp_hwmgr_func {
int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode);
int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire);
int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state);
+ int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate);
+ int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr,
+ bool disable);
};
struct pp_table_func {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
index 886b9a21ebd8..a886f0644d24 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
@@ -159,7 +159,7 @@
//FIXME need updating
// Debug Overrides Bitmask
#define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001
-#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002
+#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002
// I2C Config Bit Defines
#define I2C_CONTROLLER_ENABLED 1
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index abd4debb3def..fd6ec9033d06 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -27,7 +27,7 @@
#define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU11_DRIVER_IF_VERSION_VG20 0x13
-#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F
+#define SMU11_DRIVER_IF_VERSION_ARCT 0x10
#define SMU11_DRIVER_IF_VERSION_NV10 0x33
#define SMU11_DRIVER_IF_VERSION_NV14 0x34
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 769f9451d904..354f70978f82 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -207,7 +207,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT),
- WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT),
+ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT),
};
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index 4a9751971a9d..04daf7e9fe05 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -180,11 +180,13 @@ static int renoir_print_clk_levels(struct smu_context *smu,
int i, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
- SmuMetrics_t metrics = {0};
+ SmuMetrics_t metrics;
if (!clk_table || clk_type >= SMU_CLK_COUNT)
return -EINVAL;
+ memset(&metrics, 0, sizeof(metrics));
+
ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
(void *)&metrics, false);
if (ret)
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index bbb74b1d5d80..e859bb1132ac 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -368,6 +368,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
version_major = le16_to_cpu(hdr->header.header_version_major);
version_minor = le16_to_cpu(hdr->header.header_version_minor);
if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) {
+ pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id);
switch (version_minor) {
case 0:
ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size);
@@ -384,6 +385,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
return ret;
} else {
+ pr_info("use vbios provided pptable\n");
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
powerplayinfo);
@@ -1463,16 +1465,13 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
return ret;
}
-#define XGMI_STATE_D0 1
-#define XGMI_STATE_D3 0
-
int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
uint32_t pstate)
{
int ret = 0;
ret = smu_send_smc_msg_with_param(smu,
SMU_MSG_SetXgmiMode,
- pstate ? XGMI_STATE_D0 : XGMI_STATE_D3);
+ pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 7c8933f987d1..5b21386f558d 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -221,7 +221,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT),
- WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT),
+ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT),
};
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index 6b7f791685ec..d6a6692db0ac 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -14,6 +14,7 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_gem_cma_helper.h>
#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
#include <drm/drm_probe_helper.h>
#include <linux/dma-mapping.h>
#include <linux/module.h>
@@ -45,7 +46,7 @@ static int arcpgu_load(struct drm_device *drm)
{
struct platform_device *pdev = to_platform_device(drm->dev);
struct arcpgu_drm_private *arcpgu;
- struct device_node *encoder_node;
+ struct device_node *encoder_node = NULL, *endpoint_node = NULL;
struct resource *res;
int ret;
@@ -80,14 +81,23 @@ static int arcpgu_load(struct drm_device *drm)
if (arc_pgu_setup_crtc(drm) < 0)
return -ENODEV;
- /* find the encoder node and initialize it */
- encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0);
+ /*
+ * There is only one output port inside each device. It is linked with
+ * encoder endpoint.
+ */
+ endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
+ if (endpoint_node) {
+ encoder_node = of_graph_get_remote_port_parent(endpoint_node);
+ of_node_put(endpoint_node);
+ }
+
if (encoder_node) {
ret = arcpgu_drm_hdmi_init(drm, encoder_node);
of_node_put(encoder_node);
if (ret < 0)
return ret;
} else {
+ dev_info(drm->dev, "no encoder found. Assumed virtual LCD on simulation platform\n");
ret = arcpgu_drm_sim_init(drm, NULL);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index d49772de93e0..52648b4008bc 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -84,7 +84,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state)
drm_atomic_helper_commit_modeset_disables(dev, old_state);
- drm_atomic_helper_commit_planes(dev, old_state, 0);
+ drm_atomic_helper_commit_planes(dev, old_state,
+ DRM_PLANE_COMMIT_ACTIVE_ONLY);
drm_atomic_helper_commit_modeset_enables(dev, old_state);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
index 42bdc63dcffa..52750116aa19 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -581,8 +581,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter,
}
if (!in_range(&splitter->vsize, dflow->in_h)) {
- DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n",
- dflow->in_w);
+ DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n",
+ dflow->in_h);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 587052751b48..b191d39c071d 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1571,8 +1571,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
{
struct drm_device *dev = old_state->dev;
const struct drm_mode_config_helper_funcs *funcs;
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_crtc *crtc;
ktime_t start;
s64 commit_time_ms;
+ unsigned int i, new_self_refresh_mask = 0;
funcs = dev->mode_config.helper_private;
@@ -1592,6 +1595,15 @@ static void commit_tail(struct drm_atomic_state *old_state)
drm_atomic_helper_wait_for_dependencies(old_state);
+ /*
+ * We cannot safely access new_crtc_state after
+ * drm_atomic_helper_commit_hw_done() so figure out which crtc's have
+ * self-refresh active beforehand:
+ */
+ for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i)
+ if (new_crtc_state->self_refresh_active)
+ new_self_refresh_mask |= BIT(i);
+
if (funcs && funcs->atomic_commit_tail)
funcs->atomic_commit_tail(old_state);
else
@@ -1600,7 +1612,8 @@ static void commit_tail(struct drm_atomic_state *old_state)
commit_time_ms = ktime_ms_delta(ktime_get(), start);
if (commit_time_ms > 0)
drm_self_refresh_helper_update_avg_times(old_state,
- (unsigned long)commit_time_ms);
+ (unsigned long)commit_time_ms,
+ new_self_refresh_mask);
drm_atomic_helper_commit_cleanup_done(old_state);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 85bef73a6763..ae5809a1f19a 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -29,7 +29,7 @@
#include <linux/seq_file.h>
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
-#include <linux/stackdepot.h>
+#include <linux/stacktrace.h>
#include <linux/sort.h>
#include <linux/timekeeping.h>
#include <linux/math64.h>
@@ -1507,12 +1507,12 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history,
ulong *entries;
uint nr_entries;
u64 ts_nsec = entry->ts_nsec;
- u64 rem_nsec = do_div(ts_nsec, 1000000000);
+ u32 rem_nsec = do_div(ts_nsec, 1000000000);
nr_entries = stack_depot_fetch(entry->backtrace, &entries);
stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
- drm_printf(&p, " %d %ss (last at %5llu.%06llu):\n%s",
+ drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s",
entry->count,
topology_ref_type_to_str(entry->type),
ts_nsec, rem_nsec / 1000, buf);
diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
index 68f4765a5896..dd33fec5aabd 100644
--- a/drivers/gpu/drm/drm_self_refresh_helper.c
+++ b/drivers/gpu/drm/drm_self_refresh_helper.c
@@ -133,29 +133,33 @@ out_drop_locks:
* drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
* @state: the state which has just been applied to hardware
* @commit_time_ms: the amount of time in ms that this commit took to complete
+ * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in
+ * new state
*
* Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
* update the average entry/exit self refresh times on self refresh transitions.
* These averages will be used when calculating how long to delay before
* entering self refresh mode after activity.
*/
-void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
- unsigned int commit_time_ms)
+void
+drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+ unsigned int commit_time_ms,
+ unsigned int new_self_refresh_mask)
{
struct drm_crtc *crtc;
- struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct drm_crtc_state *old_crtc_state;
int i;
- for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
- new_crtc_state, i) {
+ for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
+ bool new_self_refresh_active = new_self_refresh_mask & BIT(i);
struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
struct ewma_psr_time *time;
if (old_crtc_state->self_refresh_active ==
- new_crtc_state->self_refresh_active)
+ new_self_refresh_active)
continue;
- if (new_crtc_state->self_refresh_active)
+ if (new_self_refresh_active)
time = &sr_data->entry_avg_ms;
else
time = &sr_data->exit_avg_ms;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 698db540972c..648cf0207309 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
etnaviv_cmdbuf_get_va(&submit->cmdbuf,
&gpu->mmu_context->cmdbuf_mapping));
+ mutex_unlock(&gpu->mmu_context->lock);
+
/* Reserve space for the bomap */
if (n_bomap_pages) {
bomap_start = bomap = iter.data;
@@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
obj->base.size);
}
- mutex_unlock(&gpu->mmu_context->lock);
-
etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index 043111a1d60c..f8bf488e9d71 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu
memcpy(buf, v2_context->mtlb_cpu, SZ_4K);
buf += SZ_4K;
- for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K)
- if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+ for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++)
+ if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) {
memcpy(buf, v2_context->stlb_cpu[i], SZ_4K);
+ buf += SZ_4K;
+ }
}
static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 35ebae6a1be7..3607d348c298 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
global->memory_base);
- if (ret) {
- global->ops->free(ctx);
- return NULL;
+ if (ret)
+ goto out_free;
+
+ if (global->version == ETNAVIV_IOMMU_V1 &&
+ ctx->cmdbuf_mapping.iova > 0x80000000) {
+ dev_err(global->dev,
+ "command buffer outside valid memory window\n");
+ goto out_unmap;
}
return ctx;
+
+out_unmap:
+ etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping);
+out_free:
+ global->ops->free(ctx);
+ return NULL;
}
void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 80815a5229a1..eea79125b3ea 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -36,7 +36,6 @@ config DRM_I915_DEBUG
select DRM_I915_SELFTEST
select DRM_I915_DEBUG_RUNTIME_PM
select DRM_I915_DEBUG_MMIO
- select BROKEN # for prototype uAPI
default n
help
Choose this option to turn on extra driver debugging that may affect
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 1799537a3228..c280b6ae38eb 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL
config DRM_I915_PREEMPT_TIMEOUT
int "Preempt timeout (ms, jiffy granularity)"
- default 100 # milliseconds
+ default 640 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
when submitting a new context via execlists. If the current context
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 9cd6d2348a1e..c2875b10adf9 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
crtc_state->update_wm_pre = false;
crtc_state->update_wm_post = false;
crtc_state->fifo_changed = false;
+ crtc_state->preload_luts = false;
crtc_state->wm.need_postvbl_update = false;
crtc_state->fb_bits = 0;
crtc_state->update_planes = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 0caef2592a7e..ed8c7ce62119 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk)
static u8 ehl_calc_voltage_level(int cdclk)
{
- if (cdclk > 312000)
+ if (cdclk > 326400)
+ return 3;
+ else if (cdclk > 312000)
return 2;
else if (cdclk > 180000)
return 1;
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index fa44eb73d088..aa3a063549c3 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state)
dev_priv->display.color_commit(crtc_state);
}
+static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ return !old_crtc_state->base.gamma_lut &&
+ !old_crtc_state->base.degamma_lut;
+}
+
+static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * CGM_PIPE_MODE is itself single buffered. We'd have to
+ * somehow split it out from chv_load_luts() if we wanted
+ * the ability to preload the CGM LUTs/CSC without tearing.
+ */
+ if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode)
+ return false;
+
+ return !old_crtc_state->base.gamma_lut;
+}
+
+static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * The hardware degamma is active whenever the pipe
+ * CSC is active. Thus even if the old state has no
+ * software degamma we need to avoid clobbering the
+ * linear hardware degamma mid scanout.
+ */
+ return !old_crtc_state->csc_enable &&
+ !old_crtc_state->base.gamma_lut;
+}
+
int intel_color_check(struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
@@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = chv_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = glk_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state)
crtc_state->csc_mode = icl_csc_mode(crtc_state);
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index b51f244ad7a5..c7c2b349858d 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans {
u32 dkl_de_emphasis_control;
};
-static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = {
/* VS pre-emp Non-trans mV Pre-emph dB */
{ 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */
{ 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */
@@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
{ 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */
};
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = {
+ /* HDMI Preset VS Pre-emph */
+ { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */
+ { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */
+ { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */
+ { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */
+ { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */
+ { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */
+ { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */
+ { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */
+ { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */
+ { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */
+};
+
static const struct ddi_buf_trans *
bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
{
@@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
0, &n_entries);
else
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
default_entry = n_entries - 1;
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
@@ -1794,10 +1808,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
* of Color Encoding Format and Content Color Gamut] while sending
* YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields
* which indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
- *
- * FIXME MST doesn't pass in the conn_state
*/
- if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+ if (intel_dp_needs_vsc_sdp(crtc_state, conn_state))
temp |= DP_MSA_MISC_COLOR_VSC_SDP;
I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
@@ -2373,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
icl_get_combo_buf_trans(dev_priv, encoder->type,
intel_dp->link_rate, &n_entries);
else
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
icl_get_combo_buf_trans(dev_priv, encoder->type,
@@ -2825,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
- ddi_translations = tgl_dkl_phy_ddi_translations;
+ if (encoder->type == INTEL_OUTPUT_HDMI) {
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
+ ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
+ } else {
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
+ ddi_translations = tgl_dkl_phy_dp_ddi_trans;
+ }
if (level >= n_entries)
level = n_entries - 1;
@@ -3605,7 +3622,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
else
hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
- intel_ddi_set_dp_msa(crtc_state, conn_state);
+ /* MST will call a setting of MSA after an allocating of Virtual Channel
+ * from MST encoder pre_enable callback.
+ */
+ if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
+ intel_ddi_set_dp_msa(crtc_state, conn_state);
}
static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 348ce0456696..6f5e3bd13ad1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -66,6 +66,7 @@
#include "intel_cdclk.h"
#include "intel_color.h"
#include "intel_display_types.h"
+#include "intel_dp_link_training.h"
#include "intel_fbc.h"
#include "intel_fbdev.h"
#include "intel_fifo_underrun.h"
@@ -2528,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
* the highest stride limits of them all.
*/
crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
+ if (!crtc)
+ return 0;
+
plane = to_intel_plane(crtc->base.primary);
return plane->max_stride(plane, pixel_format, modifier,
@@ -14201,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc,
/* vblanks work again, re-enable pipe CRC. */
intel_crtc_enable_pipe_crc(crtc);
} else {
+ if (new_crtc_state->preload_luts &&
+ (new_crtc_state->base.color_mgmt_changed ||
+ new_crtc_state->update_pipe))
+ intel_color_load_luts(new_crtc_state);
+
intel_pre_plane_update(old_crtc_state, new_crtc_state);
if (new_crtc_state->update_pipe)
@@ -14713,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
if (new_crtc_state->base.active &&
!needs_modeset(new_crtc_state) &&
+ !new_crtc_state->preload_luts &&
(new_crtc_state->base.color_mgmt_changed ||
new_crtc_state->update_pipe))
intel_color_load_luts(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 355c50088589..f417e0948001 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -27,7 +27,6 @@
#include <drm/drm_util.h>
#include <drm/i915_drm.h>
-#include "intel_dp_link_training.h"
enum link_m_n_set;
struct dpll;
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 4341bd66a418..1a7334dbe802 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -775,6 +775,7 @@ struct intel_crtc_state {
bool disable_cxsr;
bool update_wm_pre, update_wm_post; /* watermarks are updated */
bool fifo_changed; /* FIFO split is changed */
+ bool preload_luts;
/* Pipe source size (ie. panel fitter input size)
* All planes will be positioned inside this space,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 9ae5b8b6bbbc..03d1cba0b696 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
intel_ddi_enable_pipe_clock(pipe_config);
+
+ intel_ddi_set_dp_msa(pipe_config, conn_state);
}
static void intel_mst_enable_dp(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 3d1061470e76..48c960ca12fb 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -234,6 +234,11 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->apertures->ranges[0].base = ggtt->gmadr.start;
info->apertures->ranges[0].size = ggtt->mappable_end;
+ /* Our framebuffer is the entirety of fbdev's system memory */
+ info->fix.smem_start =
+ (unsigned long)(ggtt->gmadr.start + vma->node.start);
+ info->fix.smem_len = vma->node.size;
+
vaddr = i915_vma_pin_iomap(vma);
if (IS_ERR(vaddr)) {
DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
@@ -243,10 +248,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->screen_base = vaddr;
info->screen_size = vma->node.size;
- /* Our framebuffer is the entirety of fbdev's system memory */
- info->fix.smem_start = (unsigned long)info->screen_base;
- info->fix.smem_len = info->screen_size;
-
drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
/* If the object is shmemfs backed, it will have given us zeroed pages.
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index edc41fc40726..72fda0430062 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -2885,7 +2885,7 @@ struct intel_plane *
skl_universal_plane_create(struct drm_i915_private *dev_priv,
enum pipe pipe, enum plane_id plane_id)
{
- static const struct drm_plane_funcs *plane_funcs;
+ const struct drm_plane_funcs *plane_funcs;
struct intel_plane *plane;
enum drm_plane_type plane_type;
unsigned int supported_rotations;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index de6e55af82cf..4237a2887ff2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -236,6 +236,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex);
+ kfree(ctx->jump_whitelist);
+
if (ctx->timeline)
intel_timeline_put(ctx->timeline);
@@ -366,7 +368,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
- rcu_read_lock();
+ mutex_lock(&ce->timeline->mutex);
list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
if (i915_request_completed(rq))
break;
@@ -376,7 +378,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (engine)
break;
}
- rcu_read_unlock();
+ mutex_unlock(&ce->timeline->mutex);
return engine;
}
@@ -527,6 +529,9 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+ ctx->jump_whitelist = NULL;
+ ctx->jump_whitelist_cmds = 0;
+
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
@@ -722,6 +727,7 @@ int i915_gem_init_contexts(struct drm_i915_private *i915)
void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
{
destroy_kernel_context(&i915->kernel_context);
+ flush_work(&i915->gem.contexts.free_work);
}
static int context_idr_cleanup(int id, void *p, void *data)
@@ -1136,7 +1142,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
if (i915_gem_context_is_closed(ctx)) {
err = -ENOENT;
- goto out;
+ goto unlock;
}
if (vm == rcu_access_pointer(ctx->vm))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 861d7d92fe9f..3870dd5daaa0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -176,6 +176,13 @@ struct i915_gem_context {
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /** jump_whitelist: Bit array for tracking cmds during cmdparsing
+ * Guarded by struct_mutex
+ */
+ unsigned long *jump_whitelist;
+ /** jump_whitelist_cmds: No of cmd slots available */
+ u32 jump_whitelist_cmds;
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e4f5c269150a..f0998f1225af 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -298,7 +298,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ return intel_engine_requires_cmd_parser(eb->engine) ||
+ (intel_engine_using_cmd_parser(eb->engine) &&
+ eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@@ -1990,40 +1992,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = eb->i915;
+ struct i915_vma * const vma = *eb->vma;
+ struct i915_address_space *vm;
+ u64 flags;
+
+ /*
+ * PPGTT backed shadow buffers must be mapped RO, to prevent
+ * post-scan tampering
+ */
+ if (CMDPARSER_USES_GGTT(dev_priv)) {
+ flags = PIN_GLOBAL;
+ vm = &dev_priv->ggtt.vm;
+ } else if (vma->vm->has_read_only) {
+ flags = PIN_USER;
+ vm = vma->vm;
+ i915_gem_object_set_readonly(obj);
+ } else {
+ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
struct i915_vma *vma;
+ u64 batch_start;
+ u64 shadow_batch_start;
int err;
pool = intel_engine_get_pool(eb->engine, eb->batch_len);
if (IS_ERR(pool))
return ERR_CAST(pool);
- err = intel_engine_cmd_parser(eb->engine,
+ vma = shadow_batch_pin(eb, pool->obj);
+ if (IS_ERR(vma))
+ goto err;
+
+ batch_start = gen8_canonical_addr(eb->batch->node.start) +
+ eb->batch_start_offset;
+
+ shadow_batch_start = gen8_canonical_addr(vma->node.start);
+
+ err = intel_engine_cmd_parser(eb->gem_context,
+ eb->engine,
eb->batch->obj,
- pool->obj,
+ batch_start,
eb->batch_start_offset,
eb->batch_len,
- is_master);
+ pool->obj,
+ shadow_batch_start);
+
if (err) {
- if (err == -EACCES) /* unhandled chained batch */
+ i915_vma_unpin(vma);
+
+ /*
+ * Unsafe GGTT-backed buffers can still be submitted safely
+ * as non-secure.
+ * For PPGTT backing however, we have no choice but to forcibly
+ * reject unsafe buffers
+ */
+ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
+ /* Execute original buffer non-secure */
vma = NULL;
else
vma = ERR_PTR(err);
goto err;
}
- vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- goto err;
-
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
+ eb->batch_start_offset = 0;
+ eb->batch = vma;
+
+ if (CMDPARSER_USES_GGTT(eb->i915))
+ eb->batch_flags |= I915_DISPATCH_SECURE;
+
+ /* eb->batch_len unchanged */
+
vma->private = pool;
return vma;
@@ -2430,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
@@ -2441,7 +2498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
- eb.i915 = to_i915(dev);
+ eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2461,8 +2518,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
+ if (INTEL_GEN(i915) >= 11)
+ return -ENODEV;
+
+ /* Return -EPERM to trigger fallback code on old binaries. */
+ if (!HAS_SECURE_BATCHES(i915))
+ return -EPERM;
+
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@@ -2539,34 +2603,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (eb.batch_len == 0)
+ eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
if (eb_use_cmdparser(&eb)) {
struct i915_vma *vma;
- vma = eb_parse(&eb, drm_is_current_master(file));
+ vma = eb_parse(&eb);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_vma;
}
-
- if (vma) {
- /*
- * Batch parsed and accepted:
- *
- * Set the DISPATCH_SECURE bit to remove the NON_SECURE
- * bit from MI_BATCH_BUFFER_START commands issued in
- * the dispatch_execbuffer implementations. We
- * specifically don't want that set on batches the
- * command parser has accepted.
- */
- eb.batch_flags |= I915_DISPATCH_SECURE;
- eb.batch_start_offset = 0;
- eb.batch = vma;
- }
}
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->size - eb.batch_start_offset;
-
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 1e045c337044..4c72d74d6576 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
obj->mm.dirty = false;
for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
+ if (obj->mm.dirty && trylock_page(page)) {
+ /*
+ * As this may not be anonymous memory (e.g. shmem)
+ * but exist on a real mapping, we have to lock
+ * the page in order to dirty it -- holding
+ * the page reference is not sufficient to
+ * prevent the inode from being truncated.
+ * Play safe and take the lock.
+ *
+ * However...!
+ *
+ * The mmu-notifier can be invalidated for a
+ * migrate_page, that is alreadying holding the lock
+ * on the page. Such a try_to_unmap() will result
+ * in us calling put_pages() and so recursively try
+ * to lock the page. We avoid that deadlock with
+ * a trylock_page() and in exchange we risk missing
+ * some page dirtying.
+ */
set_page_dirty(page);
+ unlock_page(page);
+ }
mark_page_accessed(page);
put_page(page);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ee9d2bcd2c13..ef7bc41ffffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
GEM_BUG_ON(rq->hw_context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
- err = mutex_lock_interruptible_nested(&tl->mutex,
- SINGLE_DEPTH_NESTING);
- if (err)
- return err;
+ /*
+ * Ideally, we just want to insert our foreign fence as
+ * a barrier into the remove context, such that this operation
+ * occurs after all current operations in that context, and
+ * all future operations must occur after this.
+ *
+ * Currently, the timeline->last_request tracking is guarded
+ * by its mutex and so we must obtain that to atomically
+ * insert our barrier. However, since we already hold our
+ * timeline->mutex, we must be careful against potential
+ * inversion if we are the kernel_context as the remote context
+ * will itself poke at the kernel_context when it needs to
+ * unpin. Ergo, if already locked, we drop both locks and
+ * try again (through the magic of userspace repeating EAGAIN).
+ */
+ if (!mutex_trylock(&tl->mutex))
+ return -EAGAIN;
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index bc3b72bfa9e3..01765a7ec18f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- GEM_BUG_ON(execlists->active - execlists->inflight >
- execlists_num_ports(execlists));
- return READ_ONCE(*execlists->active);
+ return *READ_ONCE(execlists->active);
}
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f8113bc756c6..813bd3a610d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
#include "i915_drv.h"
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
#include "intel_ring.h"
@@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
@@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
+ intel_engine_fini_retire(engine);
intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
@@ -1372,6 +1374,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
execlists_active_lock_bh(execlists);
+ rcu_read_lock();
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1409,6 +1412,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (tl)
intel_timeline_put(tl);
}
+ rcu_read_unlock();
execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5051f304705b..06aa14c7aa8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
{
- cancel_delayed_work(&engine->heartbeat.work);
- i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+ if (cancel_delayed_work(&engine->heartbeat.work))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
}
void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 3c0f490ff2c7..0e1ad4a4bd97 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+static void
+__queue_and_release_pm(struct i915_request *rq,
+ struct intel_timeline *tl,
+ struct intel_engine_cs *engine)
+{
+ struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * We have to serialise all potential retirement paths with our
+ * submission, as we don't want to underflow either the
+ * engine->wakeref.counter or our timeline->active_count.
+ *
+ * Equally, we cannot allow a new submission to start until
+ * after we finish queueing, nor could we allow that submitter
+ * to retire us before we are ready!
+ */
+ spin_lock(&timelines->lock);
+
+ /* Let intel_gt_retire_requests() retire us (acquired under lock) */
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+
+ /* Hand the request over to HW and so engine_retire() */
+ __i915_request_queue(rq, NULL);
+
+ /* Let new submissions commence (and maybe retire this timeline) */
+ __intel_wakeref_defer_park(&engine->wakeref);
+
+ spin_unlock(&timelines->lock);
+}
+
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
+ struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
unsigned long flags;
bool result = true;
@@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
+ *
+ * For unlocking, there are 2 other parties and the GPU who have a
+ * stake here.
+ *
+ * A new gpu user will be waiting on the engine-pm to start their
+ * engine_unpark. New waiters are predicated on engine->wakeref.count
+ * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+ * engine->wakeref.
+ *
+ * The other party is intel_gt_retire_requests(), which is walking the
+ * list of active timelines looking for completions. Meanwhile as soon
+ * as we call __i915_request_queue(), the GPU may complete our request.
+ * Ergo, if we put ourselves on the timelines.active_list
+ * (se intel_timeline_enter()) before we increment the
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an undeflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(engine->kernel_context);
+ flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
- rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(i915_request_timeline(rq));
-
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
@@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_commit(rq);
- /* Release our exclusive hold on the engine */
- __intel_wakeref_defer_park(&engine->wakeref);
- __i915_request_queue(rq, NULL);
+ /* Expose ourselves to the world */
+ __queue_and_release_pm(rq, ce->timeline, engine);
result = false;
out_unlock:
- __timeline_mark_unlock(engine->kernel_context, flags);
+ __timeline_mark_unlock(ce, flags);
return result;
}
@@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->gt);
+ /* While gt calls i915_vma_parked(), we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..24e20344dc22 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c5d1047a4bc5..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,16 +451,25 @@ struct intel_engine_cs {
struct intel_engine_execlists execlists;
+ /*
+ * Keep track of completed timelines on this engine for early
+ * retirement with the goal of quickly enabling powersaving as
+ * soon as the engine is idle.
+ */
+ struct intel_timeline *retire;
+ struct work_struct retire_work;
+
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -528,9 +537,15 @@ struct intel_engine_cs {
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 060a27d9af34..a459a42ad5c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -61,6 +61,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
@@ -86,6 +89,11 @@ static int __gt_park(struct intel_wakeref *wf)
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+ intel_rc6_ctx_wa_check(&i915->gt.rc6);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
+
GEM_BUG_ON(!wakeref);
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
@@ -97,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
- .flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
@@ -264,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
static suspend_state_t pm_suspend_target(void)
{
-#if IS_ENABLED(CONFIG_PM_SLEEP)
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
return pm_suspend_target_state;
#else
return PM_SUSPEND_TO_IDLE;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b3e17399be9b..990efc27a4e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(&gt->wakeref);
}
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(&gt->wakeref);
+}
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index b73229a84d85..3dc13ecf41bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/workqueue.h>
+
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_gt.h"
@@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt)
intel_engine_flush_submission(engine);
}
+static void engine_retire(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), retire_work);
+ struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+ do {
+ struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+ /*
+ * Our goal here is to retire _idle_ timelines as soon as
+ * possible (as they are idle, we do not expect userspace
+ * to be cleaning up anytime soon).
+ *
+ * If the timeline is currently locked, either it is being
+ * retired elsewhere or about to be!
+ */
+ if (mutex_trylock(&tl->mutex)) {
+ retire_requests(tl);
+ mutex_unlock(&tl->mutex);
+ }
+ intel_timeline_put(tl);
+
+ GEM_BUG_ON(!next);
+ tl = ptr_mask_bits(next, 1);
+ } while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ struct intel_timeline *first;
+
+ /*
+ * We open-code a llist here to include the additional tag [BIT(0)]
+ * so that we know when the timeline is already on a
+ * retirement queue: either this engine or another.
+ *
+ * However, we rely on that a timeline can only be active on a single
+ * engine at any one time and that add_retire() is called before the
+ * engine releases the timeline and transferred to another to retire.
+ */
+
+ if (READ_ONCE(tl->retire)) /* already queued */
+ return false;
+
+ intel_timeline_get(tl);
+ first = READ_ONCE(engine->retire);
+ do
+ tl->retire = ptr_pack_bits(first, 1, 1);
+ while (!try_cmpxchg(&engine->retire, &first, tl));
+
+ return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ if (add_retire(engine, tl))
+ schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+ INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+ flush_work(&engine->retire_work);
+ GEM_BUG_ON(engine->retire);
+}
+
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
intel_timeline_get(tl);
- GEM_BUG_ON(!tl->active_count);
- tl->active_count++; /* pin the list element */
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock_irqrestore(&timelines->lock, flags);
if (timeout > 0) {
@@ -74,16 +149,16 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
- if (--tl->active_count)
- active_count += !!rcu_access_pointer(tl->last_request.fence);
- else
+ if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
+ else
+ active_count += !!rcu_access_pointer(tl->last_request.fence);
mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
- GEM_BUG_ON(tl->active_count);
+ GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index bd31cbce47e0..d626fb115386 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,9 @@
#ifndef INTEL_GT_REQUESTS_H
#define INTEL_GT_REQUESTS_H
+struct intel_engine_cs;
struct intel_gt;
+struct intel_timeline;
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
intel_gt_retire_requests_timeout(gt, 0);
}
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 51aef2a233cb..9fdefbdc3546 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -990,6 +991,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
+static void restore_default_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state)
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context * const ce = rq->hw_context;
+ u32 head;
+
+ /*
+ * The executing context has been cancelled. We want to prevent
+ * further execution along this context and propagate the error on
+ * to anything depending on its results.
+ *
+ * In __i915_request_submit(), we apply the -EIO and remove the
+ * requests' payloads for any banned requests. But first, we must
+ * rewind the context back to the start of the incomplete request so
+ * that we do not jump back into the middle of the batch.
+ *
+ * We preserve the breadcrumbs and semaphores of the incomplete
+ * requests so that inter-timeline dependencies (i.e other timelines)
+ * remain correctly ordered. And we defer to __i915_request_submit()
+ * so that all asynchronous waits are correctly handled.
+ */
+ GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+ __func__, engine->name, rq->fence.context, rq->fence.seqno);
+
+ /* On resubmission of the active request, payload will be scrubbed */
+ if (i915_request_completed(rq))
+ head = rq->tail;
+ else
+ head = active_request(ce->timeline, rq)->head;
+ ce->ring->head = intel_ring_wrap(ce->ring, head);
+ intel_ring_update_space(ce->ring);
+
+ /* Scrub the context image to prevent replaying the previous batch */
+ restore_default_state(ce, engine);
+ __execlists_update_reg_state(ce, engine);
+
+ /* We've switched away, so this should be a no-op, but intent matters */
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -998,6 +1052,9 @@ __execlists_schedule_in(struct i915_request *rq)
intel_context_get(ce);
+ if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ reset_active(rq, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -1047,71 +1104,29 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
tasklet_schedule(&ve->base.execlists.tasklet);
}
-static void restore_default_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state)
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
-
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
-}
-
-static void reset_active(struct i915_request *rq,
- struct intel_engine_cs *engine)
+static inline void
+__execlists_schedule_out(struct i915_request *rq,
+ struct intel_engine_cs * const engine)
{
struct intel_context * const ce = rq->hw_context;
- u32 head;
/*
- * The executing context has been cancelled. We want to prevent
- * further execution along this context and propagate the error on
- * to anything depending on its results.
- *
- * In __i915_request_submit(), we apply the -EIO and remove the
- * requests' payloads for any banned requests. But first, we must
- * rewind the context back to the start of the incomplete request so
- * that we do not jump back into the middle of the batch.
- *
- * We preserve the breadcrumbs and semaphores of the incomplete
- * requests so that inter-timeline dependencies (i.e other timelines)
- * remain correctly ordered. And we defer to __i915_request_submit()
- * so that all asynchronous waits are correctly handled.
+ * NB process_csb() is not under the engine->active.lock and hence
+ * schedule_out can race with schedule_in meaning that we should
+ * refrain from doing non-trivial work here.
*/
- GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
- __func__, engine->name, rq->fence.context, rq->fence.seqno);
- /* On resubmission of the active request, payload will be scrubbed */
- if (i915_request_completed(rq))
- head = rq->tail;
- else
- head = active_request(ce->timeline, rq)->head;
- ce->ring->head = intel_ring_wrap(ce->ring, head);
- intel_ring_update_space(ce->ring);
-
- /* Scrub the context image to prevent replaying the previous batch */
- restore_default_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
-
- /* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
-}
-
-static inline void
-__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
-{
- struct intel_context * const ce = rq->hw_context;
+ /*
+ * If we have just completed this context, the engine may now be
+ * idle and we want to re-enter powersaving.
+ */
+ if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ i915_request_completed(rq))
+ intel_engine_add_retire(engine, ce->timeline);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- intel_gt_pm_put(engine->gt);
-
- if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
- reset_active(rq, engine);
+ intel_gt_pm_put_async(engine->gt);
/*
* If this is part of a virtual engine, its next request may
@@ -1931,16 +1946,17 @@ skip_submit:
static void
cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct i915_request * const *port, *rq;
+ struct i915_request * const *port;
- for (port = execlists->pending; (rq = *port); port++)
- execlists_schedule_out(rq);
+ for (port = execlists->pending; *port; port++)
+ execlists_schedule_out(*port);
memset(execlists->pending, 0, sizeof(execlists->pending));
- for (port = execlists->active; (rq = *port); port++)
- execlists_schedule_out(rq);
- execlists->active =
- memset(execlists->inflight, 0, sizeof(execlists->inflight));
+ /* Mark the end of active before we overwrite *active */
+ for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+ execlists_schedule_out(*port);
+ WRITE_ONCE(execlists->active,
+ memset(execlists->inflight, 0, sizeof(execlists->inflight)));
}
static inline void
@@ -2093,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+ struct i915_request * const *old = execlists->active;
+
+ /* Point active to the new ELSP; prevent overwriting */
+ WRITE_ONCE(execlists->active, execlists->pending);
+ set_timeslice(engine);
+
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
/* cancel old inflight, prepare for switch */
- trace_ports(execlists, "preempted", execlists->active);
- while (*execlists->active)
- execlists_schedule_out(*execlists->active++);
+ trace_ports(execlists, "preempted", old);
+ while (*old)
+ execlists_schedule_out(*old++);
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- execlists->active =
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
-
- set_timeslice(engine);
+ WRITE_ONCE(execlists->active,
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending)));
WRITE_ONCE(execlists->pending[0], NULL);
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 5ad4a92a9582..700104b90163 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -178,8 +178,13 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
rc6_mode);
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+ /*
+ * WaRsDisableCoarsePowerGating:skl,cnl
+ * - Render/Media PG need to be disabled with RC6.
+ */
+ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
static void gen8_rc6_enable(struct intel_rc6 *rc6)
@@ -486,6 +491,66 @@ static void rpm_put(struct intel_rc6 *rc6)
rc6->wakeref = false;
}
+static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
+{
+ return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
+}
+
+static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+ rc6->ctx_corrupted = true;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @rc6: rc6 state
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
+{
+ if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+ rc6->ctx_corrupted = false;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @rc6: rc6 state
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+*/
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (rc6->ctx_corrupted)
+ return;
+
+ if (!intel_rc6_ctx_corrupted(rc6))
+ return;
+
+ DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+ intel_rc6_disable(rc6);
+ rc6->ctx_corrupted = true;
+
+ return;
+}
+
static void __intel_rc6_disable(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
@@ -510,6 +575,8 @@ void intel_rc6_init(struct intel_rc6 *rc6)
if (!rc6_supported(rc6))
return;
+ intel_rc6_ctx_wa_init(rc6);
+
if (IS_CHERRYVIEW(i915))
err = chv_rc6_init(rc6);
else if (IS_VALLEYVIEW(i915))
@@ -544,6 +611,9 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
GEM_BUG_ON(rc6->enabled);
+ if (rc6->ctx_corrupted)
+ return;
+
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (IS_CHERRYVIEW(i915))
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 5e6711f36457..1370f6834a4c 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -22,4 +22,7 @@ void intel_rc6_disable(struct intel_rc6 *rc6);
u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6);
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6);
+
#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index 214f354d6ae4..89ad5697a8d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -23,6 +23,7 @@ struct intel_rc6 {
bool supported : 1;
bool enabled : 1;
bool wakeref : 1;
+ bool ctx_corrupted : 1;
};
#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index f03e000051c1..c97423a76642 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index ece20504d240..374b28f13ca0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring)
i915_vma_make_unshrinkable(vma);
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+ ring->vaddr = addr;
return 0;
err_ring:
@@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring)
if (!atomic_dec_and_test(&ring->pin_count))
return;
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->emit);
-
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
+ i915_vma_unpin(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 14ad10acd548..649798c184fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
+ GEM_BUG_ON(timeline->retire);
if (timeline->hwsp_cacheline)
cacheline_free(timeline->hwsp_cacheline);
@@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /*
+ * Pretend we are serialised by the timeline->mutex.
+ *
+ * While generally true, there are a few exceptions to the rule
+ * for the engine->kernel_context being used to manage power
+ * transitions. As the engine_park may be called from under any
+ * timeline, it uses the power mutex as a global serialisation
+ * lock to prevent any other request entering its timeline.
+ *
+ * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+ *
+ * However, intel_gt_retire_request() does not know which engine
+ * it is retiring along and so cannot partake in the engine-pm
+ * barrier, and there we use the tl->active_count as a means to
+ * pin the timeline in the active_list while the locks are dropped.
+ * Ergo, as that is outside of the engine-pm barrier, we need to
+ * use atomic to manipulate tl->active_count.
+ */
lockdep_assert_held(&tl->mutex);
-
GEM_BUG_ON(!atomic_read(&tl->pin_count));
- if (tl->active_count++)
+
+ if (atomic_add_unless(&tl->active_count, 1, 0))
return;
- GEM_BUG_ON(!tl->active_count); /* overflow? */
spin_lock_irqsave(&timelines->lock, flags);
- list_add(&tl->link, &timelines->active_list);
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
spin_unlock_irqrestore(&timelines->lock, flags);
}
@@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
- GEM_BUG_ON(!tl->active_count);
- if (--tl->active_count)
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ if (atomic_add_unless(&tl->active_count, -1, 1))
return;
spin_lock_irqsave(&timelines->lock, flags);
- list_del(&tl->link);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
spin_unlock_irqrestore(&timelines->lock, flags);
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 98d9ee166379..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -42,7 +42,7 @@ struct intel_timeline {
* from the intel_context caller plus internal atomicity.
*/
atomic_t pin_count;
- unsigned int active_count;
+ atomic_t active_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
@@ -66,6 +66,9 @@ struct intel_timeline {
*/
struct i915_active_fence last_request;
+ /** A chain of completed timelines ready for early retirement. */
+ struct intel_timeline *retire;
+
/**
* We track the most recent seqno that we wait on in every context so
* that we only have to emit a new await and dependency on a more
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..cbf6b0735272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
- intel_engine_pm_put(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
p->critical_section_end();
- /* engine wakeref is sync (instant) */
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 019ae6486e8d..3ee4a4e7689d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -554,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc)
};
/*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to suspend the GuC.
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ /*
* The ENTER_S_STATE action queues the save/restore operation in GuC FW
* and then returns, so waiting on the H2G is not enough to guarantee
* GuC is done. When all the processing is done, GuC writes
@@ -610,6 +617,14 @@ int intel_guc_resume(struct intel_guc *guc)
GUC_POWER_D0,
};
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to resume the GuC but we do need to enable the
+ * GuC communication on resume (above).
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 207383dda84d..dca15ace88f6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -91,14 +91,15 @@ static void debug_active_init(struct i915_active *ref)
static void debug_active_activate(struct i915_active *ref)
{
- lockdep_assert_held(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
if (!atomic_read(&ref->count)) /* before the first inc */
debug_object_activate(ref, &active_debug_desc);
+ spin_unlock_irq(&ref->tree_lock);
}
static void debug_active_deactivate(struct i915_active *ref)
{
- lockdep_assert_held(&ref->mutex);
+ lockdep_assert_held(&ref->tree_lock);
if (!atomic_read(&ref->count)) /* after the last dec */
debug_object_deactivate(ref, &active_debug_desc);
}
@@ -128,29 +129,22 @@ __active_retire(struct i915_active *ref)
{
struct active_node *it, *n;
struct rb_root root;
- bool retire = false;
+ unsigned long flags;
- lockdep_assert_held(&ref->mutex);
GEM_BUG_ON(i915_active_is_idle(ref));
/* return the unused nodes to our slabcache -- flushing the allocator */
- if (atomic_dec_and_test(&ref->count)) {
- debug_active_deactivate(ref);
- root = ref->tree;
- ref->tree = RB_ROOT;
- ref->cache = NULL;
- retire = true;
- }
-
- mutex_unlock(&ref->mutex);
- if (!retire)
+ if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
return;
GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
- rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
- GEM_BUG_ON(i915_active_fence_isset(&it->base));
- kmem_cache_free(global.slab_cache, it);
- }
+ debug_active_deactivate(ref);
+
+ root = ref->tree;
+ ref->tree = RB_ROOT;
+ ref->cache = NULL;
+
+ spin_unlock_irqrestore(&ref->tree_lock, flags);
/* After the final retire, the entire struct may be freed */
if (ref->retire)
@@ -158,6 +152,11 @@ __active_retire(struct i915_active *ref)
/* ... except if you wait on it, you must manage your own references! */
wake_up_var(ref);
+
+ rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
+ GEM_BUG_ON(i915_active_fence_isset(&it->base));
+ kmem_cache_free(global.slab_cache, it);
+ }
}
static void
@@ -169,7 +168,6 @@ active_work(struct work_struct *wrk)
if (atomic_add_unless(&ref->count, -1, 1))
return;
- mutex_lock(&ref->mutex);
__active_retire(ref);
}
@@ -180,9 +178,7 @@ active_retire(struct i915_active *ref)
if (atomic_add_unless(&ref->count, -1, 1))
return;
- /* If we are inside interrupt context (fence signaling), defer */
- if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS ||
- !mutex_trylock(&ref->mutex)) {
+ if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
queue_work(system_unbound_wq, &ref->work);
return;
}
@@ -227,7 +223,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
if (!prealloc)
return NULL;
- mutex_lock(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
parent = NULL;
@@ -257,7 +253,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
out:
ref->cache = node;
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
BUILD_BUG_ON(offsetof(typeof(*node), base));
return &node->base;
@@ -278,8 +274,10 @@ void __i915_active_init(struct i915_active *ref,
if (bits & I915_ACTIVE_MAY_SLEEP)
ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+ spin_lock_init(&ref->tree_lock);
ref->tree = RB_ROOT;
ref->cache = NULL;
+
init_llist_head(&ref->preallocated_barriers);
atomic_set(&ref->count, 0);
__mutex_init(&ref->mutex, "i915_active", key);
@@ -510,7 +508,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
if (RB_EMPTY_ROOT(&ref->tree))
return NULL;
- mutex_lock(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
/*
@@ -575,7 +573,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
goto match;
}
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
return NULL;
@@ -583,7 +581,7 @@ match:
rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
if (p == &ref->cache->node)
ref->cache = NULL;
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
return rb_entry(p, struct active_node, node);
}
@@ -664,6 +662,7 @@ unwind:
void i915_active_acquire_barrier(struct i915_active *ref)
{
struct llist_node *pos, *next;
+ unsigned long flags;
GEM_BUG_ON(i915_active_is_idle(ref));
@@ -673,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
* populated by i915_request_add_active_barriers() to point to the
* request that will eventually release them.
*/
- mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
struct active_node *node = barrier_from_ll(pos);
struct intel_engine_cs *engine = barrier_to_engine(node);
struct rb_node **p, *parent;
+ spin_lock_irqsave_nested(&ref->tree_lock, flags,
+ SINGLE_DEPTH_NESTING);
parent = NULL;
p = &ref->tree.rb_node;
while (*p) {
@@ -694,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
}
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &ref->tree);
+ spin_unlock_irqrestore(&ref->tree_lock, flags);
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
intel_engine_pm_put(engine);
}
- mutex_unlock(&ref->mutex);
}
void i915_request_add_active_barriers(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index d89a74c142c6..96aed0ee700a 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -48,6 +48,7 @@ struct i915_active {
atomic_t count;
struct mutex mutex;
+ spinlock_t tree_lock;
struct active_node *cache;
struct rb_root tree;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 24555102e198..f24096e27bef 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -53,13 +53,11 @@
* granting userspace undue privileges. There are three categories of privilege.
*
* First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
*
* Second, commands which access registers. To support correct/enhanced
* userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
*
* Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
* The parser always rejects such commands.
@@ -84,9 +82,9 @@
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
*/
/*
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor {
* CMD_DESC_REJECT: The command is never allowed
* CMD_DESC_REGISTER: The command should be checked against the
* register whitelist for the appropriate ring
- * CMD_DESC_MASTER: The command is allowed if the submitting process
- * is the DRM master
*/
u32 flags;
#define CMD_DESC_FIXED (1<<0)
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor {
#define CMD_DESC_REJECT (1<<2)
#define CMD_DESC_REGISTER (1<<3)
#define CMD_DESC_BITMASK (1<<4)
-#define CMD_DESC_MASTER (1<<5)
/*
* The command's unique identification bits and the bitmask to get them.
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table {
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), ~0u << (opm) }, \
+ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table {
#define R CMD_DESC_REJECT
#define W CMD_DESC_REGISTER
#define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
/* Command Mask Fixed Len Action
---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, R ),
- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ),
};
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_PREDICATE, SMI, F, 1, S ),
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_RS_CONTEXT, SMI, F, 1, S ),
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ),
};
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD( MFX_WAIT, SMFX, F, 1, S ),
};
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
}}, ),
};
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B,
.bits = {{
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
};
static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ * 1) Those that touch registers
+ * 2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+ CMD( MI_NOOP, SMI, F, 1, S ),
+ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ),
+ CMD( MI_FLUSH, SMI, F, 1, S ),
+ CMD( MI_ARB_CHECK, SMI, F, 1, S ),
+ CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
+ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ),
+ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ),
+ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ),
+ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
+ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ),
+ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
+
+ /*
+ * We allow BB_START but apply further checks. We just sanitize the
+ * basic fields here.
+ */
+#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0)
+#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
+ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B,
+ .bits = {{
+ .offset = 0,
+ .mask = MI_BB_START_OPERAND_MASK,
+ .expected = MI_BB_START_OPERAND_EXPECT,
+ }}, ),
+};
+
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
#undef R
#undef W
#undef B
-#undef M
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
};
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
};
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
};
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
};
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
};
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
};
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
/*
* Register whitelists, sorted by increasing register offset.
*/
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+ REG32(BCS_SWCTRL),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ REG64_IDX(BCS_GPR, 0),
+ REG64_IDX(BCS_GPR, 1),
+ REG64_IDX(BCS_GPR, 2),
+ REG64_IDX(BCS_GPR, 3),
+ REG64_IDX(BCS_GPR, 4),
+ REG64_IDX(BCS_GPR, 5),
+ REG64_IDX(BCS_GPR, 6),
+ REG64_IDX(BCS_GPR, 7),
+ REG64_IDX(BCS_GPR, 8),
+ REG64_IDX(BCS_GPR, 9),
+ REG64_IDX(BCS_GPR, 10),
+ REG64_IDX(BCS_GPR, 11),
+ REG64_IDX(BCS_GPR, 12),
+ REG64_IDX(BCS_GPR, 13),
+ REG64_IDX(BCS_GPR, 14),
+ REG64_IDX(BCS_GPR, 15),
};
#undef REG64
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
struct drm_i915_reg_table {
const struct drm_i915_reg_descriptor *regs;
int num_regs;
- bool master;
};
static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
};
static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
};
static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+};
+
+static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
+ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
};
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+ u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+
+ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+ return 0xFF;
+
+ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+ return 0;
+}
+
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
int cmd_table_count;
int ret;
- if (!IS_GEN(engine->i915, 7))
+ if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
+ engine->class == COPY_ENGINE_CLASS))
return;
switch (engine->class) {
case RENDER_CLASS:
if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_render_ring_cmds;
+ cmd_tables = hsw_render_ring_cmd_table;
cmd_table_count =
- ARRAY_SIZE(hsw_render_ring_cmds);
+ ARRAY_SIZE(hsw_render_ring_cmd_table);
} else {
- cmd_tables = gen7_render_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+ cmd_tables = gen7_render_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->reg_tables = ivb_render_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
}
-
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VIDEO_DECODE_CLASS:
- cmd_tables = gen7_video_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+ cmd_tables = gen7_video_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case COPY_ENGINE_CLASS:
- if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_blt_ring_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+ if (IS_GEN(engine->i915, 9)) {
+ cmd_tables = gen9_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+ engine->get_cmd_length_mask =
+ gen9_blt_get_cmd_length_mask;
+
+ /* BCS Engine unsafe without parser */
+ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
+ } else if (IS_HASWELL(engine->i915)) {
+ cmd_tables = hsw_blt_ring_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
} else {
- cmd_tables = gen7_blt_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+ cmd_tables = gen7_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
- if (IS_HASWELL(engine->i915)) {
+ if (IS_GEN(engine->i915, 9)) {
+ engine->reg_tables = gen9_blt_reg_tables;
+ engine->reg_table_count =
+ ARRAY_SIZE(gen9_blt_reg_tables);
+ } else if (IS_HASWELL(engine->i915)) {
engine->reg_tables = hsw_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
} else {
engine->reg_tables = ivb_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
}
-
- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VIDEO_ENHANCEMENT_CLASS:
- cmd_tables = hsw_vebox_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+ cmd_tables = hsw_vebox_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
return;
}
- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+ engine->flags |= I915_ENGINE_USING_CMD_PARSER;
}
/**
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
- if (!intel_engine_needs_cmd_parser(engine))
+ if (!intel_engine_using_cmd_parser(engine))
return;
fini_hash_table(engine);
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, u32 addr)
{
const struct drm_i915_reg_table *table = engine->reg_tables;
+ const struct drm_i915_reg_descriptor *reg = NULL;
int count = engine->reg_table_count;
- for (; count > 0; ++table, --count) {
- if (!table->master || is_master) {
- const struct drm_i915_reg_descriptor *reg;
+ for (; !reg && (count > 0); ++table, --count)
+ reg = __find_reg(table->regs, table->num_regs, addr);
- reg = __find_reg(table->regs, table->num_regs, addr);
- if (reg != NULL)
- return reg;
- }
- }
-
- return NULL;
+ return reg;
}
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
- const u32 *cmd, u32 length,
- const bool is_master)
+ const u32 *cmd, u32 length)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
- *cmd);
- return false;
- }
-
if (desc->flags & CMD_DESC_REGISTER) {
/*
* Get the distance between individual register offset
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg(engine, is_master, reg_addr);
+ find_reg(engine, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return true;
}
+static int check_bbstart(const struct i915_gem_context *ctx,
+ u32 *cmd, u32 offset, u32 length,
+ u32 batch_len,
+ u64 batch_start,
+ u64 shadow_batch_start)
+{
+ u64 jump_offset, jump_target;
+ u32 target_cmd_offset, target_cmd_index;
+
+ /* For igt compatibility on older platforms */
+ if (CMDPARSER_USES_GGTT(ctx->i915)) {
+ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+ return -EACCES;
+ }
+
+ if (length != 3) {
+ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+ length);
+ return -EINVAL;
+ }
+
+ jump_target = *(u64*)(cmd+1);
+ jump_offset = jump_target - batch_start;
+
+ /*
+ * Any underflow of jump_target is guaranteed to be outside the range
+ * of a u32, so >= test catches both too large and too small
+ */
+ if (jump_offset >= batch_len) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ /*
+ * This cannot overflow a u32 because we already checked jump_offset
+ * is within the BB, and the batch_len is a u32
+ */
+ target_cmd_offset = lower_32_bits(jump_offset);
+ target_cmd_index = target_cmd_offset / sizeof(u32);
+
+ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+ if (target_cmd_index == offset)
+ return 0;
+
+ if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+ return -EINVAL;
+ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+{
+ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+ const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+ unsigned long *next_whitelist;
+
+ if (CMDPARSER_USES_GGTT(ctx->i915))
+ return;
+
+ if (batch_cmds <= ctx->jump_whitelist_cmds) {
+ bitmap_zero(ctx->jump_whitelist, batch_cmds);
+ return;
+ }
+
+again:
+ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+ if (next_whitelist) {
+ kfree(ctx->jump_whitelist);
+ ctx->jump_whitelist = next_whitelist;
+ ctx->jump_whitelist_cmds =
+ next_size * BITS_PER_BYTE * sizeof(long);
+ return;
+ }
+
+ if (next_size > exact_size) {
+ next_size = exact_size;
+ goto again;
+ }
+
+ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+
+ return;
+}
+
#define LENGTH_BIAS 2
/**
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
* @engine: the engine on which the batch is to execute
* @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @batch_start: Canonical base address of batch
* @batch_start_offset: byte offset in the batch at which execution starts
* @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+
+int intel_engine_cmd_parser(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master)
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start)
{
- u32 *cmd, *batch_end;
+ u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
return PTR_ERR(cmd);
}
+ init_whitelist(ctx, batch_len);
+
/*
* We use the batch length as size because the shadow object is as
* large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
do {
u32 length;
- if (*cmd == MI_BATCH_BUFFER_END) {
- if (needs_clflush_after) {
- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
- drm_clflush_virt_range(ptr,
- (void *)(cmd + 1) - ptr);
- }
+ if (*cmd == MI_BATCH_BUFFER_END)
break;
- }
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
ret = -EINVAL;
- break;
- }
-
- /*
- * If the batch buffer contains a chained batch, return an
- * error that tells the caller to abort and dispatch the
- * workload as a non-secure batch.
- */
- if (desc->cmd.value == MI_BATCH_BUFFER_START) {
- ret = -EACCES;
- break;
+ goto err;
}
if (desc->flags & CMD_DESC_FIXED)
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
length,
batch_end - cmd);
ret = -EINVAL;
- break;
+ goto err;
}
- if (!check_cmd(engine, desc, cmd, length, is_master)) {
+ if (!check_cmd(engine, desc, cmd, length)) {
ret = -EACCES;
+ goto err;
+ }
+
+ if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+ ret = check_bbstart(ctx, cmd, offset, length,
+ batch_len, batch_start,
+ shadow_batch_start);
+
+ if (ret)
+ goto err;
break;
}
+ if (ctx->jump_whitelist_cmds > offset)
+ set_bit(offset, ctx->jump_whitelist);
+
cmd += length;
+ offset += length;
if (cmd >= batch_end) {
DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
ret = -EINVAL;
- break;
+ goto err;
}
} while (1);
+ if (needs_clflush_after) {
+ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+
+ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
+ }
+
+err:
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_uabi_engine(engine, dev_priv) {
- if (intel_engine_needs_cmd_parser(engine)) {
+ if (intel_engine_using_cmd_parser(engine)) {
active = true;
break;
}
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
* the parser enabled.
* 9. Don't whitelist or handle oacontrol specially, as ownership
* for oacontrol state is moving to i915-perf.
+ * 10. Support for Gen9 BCS Parsing
*/
- return 9;
+ return 10;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ccb5b566795f..87e05ca3646a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -63,6 +63,7 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_rc6.h"
#include "i915_debugfs.h"
#include "i915_drv.h"
@@ -1819,6 +1820,8 @@ static int i915_drm_resume(struct drm_device *dev)
disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
+ intel_rc6_ctx_wa_resume(&dev_priv->gt.rc6);
+
intel_gt_sanitize(&dev_priv->gt, true);
ret = i915_ggtt_enable_hw(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0567fe67e8c5..e29bc137e7ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1614,9 +1614,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define VEBOX_MASK(dev_priv) \
ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS)
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7)
+
#define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc)
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
@@ -1649,10 +1656,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
+ (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
+
/* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
- (IS_CANNONLAKE(dev_priv) || \
- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
+ (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -1834,6 +1843,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
#define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
+struct i915_vma * __must_check
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags);
+
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
static inline int __must_check
@@ -1939,12 +1956,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+int intel_engine_cmd_parser(struct i915_gem_context *cxt,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 user_batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master);
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start);
/* intel_device_info.c */
static inline struct intel_device_info *
@@ -2026,4 +2045,10 @@ i915_coherent_map_type(struct drm_i915_private *i915)
return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
}
+static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc)
+{
+ return intel_guc_is_submission_supported(guc) &&
+ intel_guc_is_running(guc);
+}
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ac23322fb382..b9eb6b3149b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -893,6 +893,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_address_space *vm = &dev_priv->ggtt.vm;
+
+ return i915_gem_object_pin(obj, vm, view, size, alignment,
+ flags | PIN_GLOBAL);
+}
+
+struct i915_vma *
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
@@ -958,7 +972,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin(vma, size, alignment, flags);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 88179202c556..6239a9adbf14 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
- ppgtt->vm.total = ggtt->vm.total;
-
return 0;
err_ppgtt:
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index ad33fbe90a28..cf8a8c3ef047 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -63,7 +63,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
break;
case I915_PARAM_HAS_SECURE_BATCHES:
- value = capable(CAP_SYS_ADMIN);
+ value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version(i915);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e8b67f5e521d..3c85cb0ee99f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1029,9 +1029,9 @@ i915_error_object_create(struct drm_i915_private *i915,
for_each_sgt_daddr(dma, iter, vma->pages) {
void __iomem *s;
- s = io_mapping_map_atomic_wc(&mem->iomap, dma);
+ s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
ret = compress_page(compress, (void __force *)s, dst);
- io_mapping_unmap_atomic(s);
+ io_mapping_unmap(s);
if (ret)
break;
}
@@ -1043,9 +1043,9 @@ i915_error_object_create(struct drm_i915_private *i915,
drm_clflush_pages(&page, 1);
- s = kmap_atomic(page);
+ s = kmap(page);
ret = compress_page(compress, s, dst);
- kunmap_atomic(s);
+ kunmap(s);
drm_clflush_pages(&page, 1);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a8c2318d3d5e..65d7c2e599de 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1870,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
config_length += num_lri_dwords(oa_config->mux_regs_len);
config_length += num_lri_dwords(oa_config->b_counter_regs_len);
config_length += num_lri_dwords(oa_config->flex_regs_len);
- config_length++; /* MI_BATCH_BUFFER_END */
+ config_length += 3; /* MI_BATCH_BUFFER_START */
config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
@@ -1895,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
oa_config->flex_regs,
oa_config->flex_regs_len);
- *cs++ = MI_BATCH_BUFFER_END;
+ /* Jump into the active wait. */
+ *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
+ MI_BATCH_BUFFER_START :
+ MI_BATCH_BUFFER_START_GEN8);
+ *cs++ = i915_ggtt_offset(stream->noa_wait);
+ *cs++ = 0;
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
@@ -3307,15 +3312,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
}
- if (props->hold_preemption) {
- if (!props->single_context) {
- DRM_DEBUG("preemption disable with no context\n");
- ret = -EINVAL;
- goto err;
- }
- privileged_op = true;
- }
-
/*
* On Haswell the OA unit supports clock gating off for a specific
* context and in this mode there's no visibility of metrics for the
@@ -3335,12 +3331,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
* doesn't request global stream access (i.e. query based sampling
* using MI_RECORD_PERF_COUNT.
*/
- if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
+ if (IS_HASWELL(perf->i915) && specific_ctx)
privileged_op = false;
else if (IS_GEN(perf->i915, 12) && specific_ctx &&
(props->sample_flags & SAMPLE_OA_REPORT) == 0)
privileged_op = false;
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+ DRM_DEBUG("preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ privileged_op = true;
+ }
+
/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
* we check a dev.i915.perf_stream_paranoid sysctl option
* to determine if it's ok to access system wide OA counters
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 05395015d1f2..2814218c5ba1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
spin_lock_irqsave(&pmu->lock, flags);
@@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
skip:
spin_unlock_irqrestore(&engine->uncore->lock, flags);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
}
}
@@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (intel_gt_pm_get_if_awake(gt)) {
val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
val = intel_get_cagf(rps, val);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
@@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu)
const char *name;
const char *unit;
} events[] = {
- __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
- __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
+ __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
+ __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
};
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index c27cfef9281c..ef25ce6e395e 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915,
struct drm_i915_engine_info __user *info_ptr;
struct drm_i915_query_engine_info query;
struct drm_i915_engine_info info = { };
+ unsigned int num_uabi_engines = 0;
struct intel_engine_cs *engine;
int len, ret;
if (query_item->flags)
return -EINVAL;
+ for_each_uabi_engine(engine, i915)
+ num_uabi_engines++;
+
len = sizeof(struct drm_i915_query_engine_info) +
- RUNTIME_INFO(i915)->num_engines *
- sizeof(struct drm_i915_engine_info);
+ num_uabi_engines * sizeof(struct drm_i915_engine_info);
ret = copy_query_item(&query, sizeof(query), len, query_item);
if (ret != 0)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 53c280c4e741..73079b503724 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -474,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define ECOCHK_PPGTT_WT_HSW (0x2 << 3)
#define ECOCHK_PPGTT_WB_HSW (0x3 << 3)
+#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
+
#define GAC_ECO_BITS _MMIO(0x14090)
#define ECOBITS_SNB_BIT (1 << 13)
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
@@ -560,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
*/
#define BCS_SWCTRL _MMIO(0x22200)
+/* There are 16 GPR registers */
+#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4)
+
#define GPGPU_THREADS_DISPATCHED _MMIO(0x2290)
#define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4)
#define HS_INVOCATION_COUNT _MMIO(0x2300)
@@ -7353,6 +7359,10 @@ enum {
#define DMC_DEBUG3 _MMIO(0x101090)
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT _MMIO(0x42060)
+#define MMIO_TIMEOUT_US(us) ((us) << 0)
+
/* interrupts */
#define DE_MASTER_IRQ_CONTROL (1 << 31)
#define DE_SPRITEB_FLIP_DONE (1 << 29)
@@ -9661,7 +9671,7 @@ enum skl_power_gate {
#define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12)
#define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12)
#define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12)
-#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10)
+#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10)
#define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \
REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans)
#define TRANS_DDI_HDCP_SIGNALLING (1 << 9)
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index 000ba43e2c02..8fd92b9130a7 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -62,7 +62,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
/* KBP is SPT compatible */
return PCH_SPT;
case INTEL_PCH_CNP_DEVICE_ID_TYPE:
- case INTEL_PCH_CNP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n");
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP;
@@ -76,6 +75,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */
return PCH_CNP;
+ case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
+ DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n");
+ WARN_ON(!IS_COFFEELAKE(dev_priv));
+ /* Comet Lake V PCH is based on KBP, which is SPT compatible */
+ return PCH_SPT;
case INTEL_PCH_ICP_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Ice Lake PCH\n");
WARN_ON(!IS_ICELAKE(dev_priv));
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index 1115c6a0522c..d26c25dd8d54 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -40,10 +40,10 @@ enum intel_pch {
#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00
#define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
-#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
+#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 5d2b460d3ee5..809bff955b5a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -107,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
*/
I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+ /*
+ * Lower the display internal timeout.
+ * This is needed to avoid any hard hangs when DSI port PLL
+ * is off and a MMIO access is attempted by any privilege
+ * application, using batch buffers or any other means.
+ */
+ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
}
static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 868cc78048d0..59aa1b6f1827 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
{
- if (!atomic_dec_and_test(&wf->count))
+ INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+ if (unlikely(!atomic_dec_and_test(&wf->count)))
goto unlock;
/* ops->put() must reschedule its own release on error/deferral */
@@ -67,13 +68,12 @@ unlock:
mutex_unlock(&wf->mutex);
}
-void __intel_wakeref_put_last(struct intel_wakeref *wf)
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
{
INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
/* Assume we are not in process context and so cannot sleep. */
- if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
- !mutex_trylock(&wf->mutex)) {
+ if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
schedule_work(&wf->work);
return;
}
@@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
{
- return wait_var_event_killable(&wf->wakeref,
- !intel_wakeref_is_active(wf));
+ int err;
+
+ might_sleep();
+
+ err = wait_var_event_killable(&wf->wakeref,
+ !intel_wakeref_is_active(wf));
+ if (err)
+ return err;
+
+ intel_wakeref_unlock_wait(wf);
+ return 0;
}
static void wakeref_auto_timeout(struct timer_list *t)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..da6e8fd506e6 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/bits.h>
+#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/stackdepot.h>
@@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
struct intel_wakeref_ops {
int (*get)(struct intel_wakeref *wf);
int (*put)(struct intel_wakeref *wf);
-
- unsigned long flags;
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
};
struct intel_wakeref {
@@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
} while (0)
int __intel_wakeref_get_first(struct intel_wakeref *wf);
-void __intel_wakeref_put_last(struct intel_wakeref *wf);
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
/**
* intel_wakeref_get: Acquire the wakeref
@@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
}
/**
- * intel_wakeref_put: Release the wakeref
- * @i915: the drm_i915_private device
+ * intel_wakeref_put_flags: Release the wakeref
* @wf: the wakeref
- * @fn: callback for releasing the wakeref, called only on final release.
+ * @flags: control flags
*
* Release our hold on the wakeref. When there are no more users,
* the runtime pm wakeref will be released after the @fn callback is called
@@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
* code otherwise.
*/
static inline void
-intel_wakeref_put(struct intel_wakeref *wf)
+__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
{
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
- __intel_wakeref_put_last(wf);
+ __intel_wakeref_put_last(wf, flags);
+}
+
+static inline void
+intel_wakeref_put(struct intel_wakeref *wf)
+{
+ might_sleep();
+ __intel_wakeref_put(wf, 0);
+}
+
+static inline void
+intel_wakeref_put_async(struct intel_wakeref *wf)
+{
+ __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
}
/**
@@ -152,6 +163,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
}
/**
+ * intel_wakeref_unlock_wait: Wait until the active callback is complete
+ * @wf: the wakeref
+ *
+ * Waits for the active callback (under the @wf->mutex or another CPU) is
+ * complete.
+ */
+static inline void
+intel_wakeref_unlock_wait(struct intel_wakeref *wf)
+{
+ mutex_lock(&wf->mutex);
+ mutex_unlock(&wf->mutex);
+ flush_work(&wf->work);
+}
+
+/**
* intel_wakeref_is_active: Query whether the wakeref is currently held
* @wf: the wakeref
*
@@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
static inline void
__intel_wakeref_defer_park(struct intel_wakeref *wf)
{
+ lockdep_assert_held(&wf->mutex);
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
atomic_set_release(&wf->count, 1);
}
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 14878ebf59d7..4a55bb6e2213 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -3,6 +3,8 @@
* Copyright (c) 2015 MediaTek Inc.
*/
+#include <drm/drm_fourcc.h>
+
#include <linux/clk.h>
#include <linux/component.h>
#include <linux/module.h>
@@ -50,6 +52,8 @@
OVL_CON_CLRFMT_RGB : 0)
#define OVL_CON_AEN BIT(8)
#define OVL_CON_ALPHA 0xff
+#define OVL_CON_VIRT_FLIP BIT(9)
+#define OVL_CON_HORZ_FLIP BIT(10)
struct mtk_disp_ovl_data {
unsigned int addr;
@@ -137,6 +141,40 @@ static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp)
return ovl->data->layer_nr;
}
+static unsigned int mtk_ovl_supported_rotations(struct mtk_ddp_comp *comp)
+{
+ return DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
+ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y;
+}
+
+static int mtk_ovl_layer_check(struct mtk_ddp_comp *comp, unsigned int idx,
+ struct mtk_plane_state *mtk_state)
+{
+ struct drm_plane_state *state = &mtk_state->base;
+ unsigned int rotation = 0;
+
+ rotation = drm_rotation_simplify(state->rotation,
+ DRM_MODE_ROTATE_0 |
+ DRM_MODE_REFLECT_X |
+ DRM_MODE_REFLECT_Y);
+ rotation &= ~DRM_MODE_ROTATE_0;
+
+ /* We can only do reflection, not rotation */
+ if ((rotation & DRM_MODE_ROTATE_MASK) != 0)
+ return -EINVAL;
+
+ /*
+ * TODO: Rotating/reflecting YUV buffers is not supported at this time.
+ * Only RGB[AX] variants are supported.
+ */
+ if (state->fb->format->is_yuv && rotation != 0)
+ return -EINVAL;
+
+ state->rotation = rotation;
+
+ return 0;
+}
+
static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx)
{
unsigned int reg;
@@ -229,6 +267,16 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
if (idx != 0)
con |= OVL_CON_AEN | OVL_CON_ALPHA;
+ if (pending->rotation & DRM_MODE_REFLECT_Y) {
+ con |= OVL_CON_VIRT_FLIP;
+ addr += (pending->height - 1) * pending->pitch;
+ }
+
+ if (pending->rotation & DRM_MODE_REFLECT_X) {
+ con |= OVL_CON_HORZ_FLIP;
+ addr += pending->pitch - 1;
+ }
+
writel_relaxed(con, comp->regs + DISP_REG_OVL_CON(idx));
writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx));
writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx));
@@ -263,9 +311,11 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = {
.stop = mtk_ovl_stop,
.enable_vblank = mtk_ovl_enable_vblank,
.disable_vblank = mtk_ovl_disable_vblank,
+ .supported_rotations = mtk_ovl_supported_rotations,
.layer_nr = mtk_ovl_layer_nr,
.layer_on = mtk_ovl_layer_on,
.layer_off = mtk_ovl_layer_off,
+ .layer_check = mtk_ovl_layer_check,
.layer_config = mtk_ovl_layer_config,
.bgclr_in_on = mtk_ovl_bgclr_in_on,
.bgclr_in_off = mtk_ovl_bgclr_in_off,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index b841d3706d8b..f80a8ba75977 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -207,6 +207,28 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc)
clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk);
}
+static
+struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
+ struct drm_plane *plane,
+ unsigned int *local_layer)
+{
+ struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+ struct mtk_ddp_comp *comp;
+ int i, count = 0;
+
+ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
+ comp = mtk_crtc->ddp_comp[i];
+ if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) {
+ *local_layer = plane->index - count;
+ return comp;
+ }
+ count += mtk_ddp_comp_layer_nr(comp);
+ }
+
+ WARN(1, "Failed to find component for plane %d\n", plane->index);
+ return NULL;
+}
+
static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
{
struct drm_crtc *crtc = &mtk_crtc->base;
@@ -283,19 +305,12 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
for (i = 0; i < mtk_crtc->layer_nr; i++) {
struct drm_plane *plane = &mtk_crtc->planes[i];
struct mtk_plane_state *plane_state;
- struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
- unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp);
+ struct mtk_ddp_comp *comp;
unsigned int local_layer;
plane_state = to_mtk_plane_state(plane->state);
-
- if (i >= comp_layer_nr) {
- comp = mtk_crtc->ddp_comp[1];
- local_layer = i - comp_layer_nr;
- } else
- local_layer = i;
- mtk_ddp_comp_layer_config(comp, local_layer,
- plane_state);
+ comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
+ mtk_ddp_comp_layer_config(comp, local_layer, plane_state);
}
return 0;
@@ -343,7 +358,6 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state);
struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
unsigned int i;
- unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp);
unsigned int local_layer;
/*
@@ -366,22 +380,30 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
plane_state = to_mtk_plane_state(plane->state);
- if (plane_state->pending.config) {
- if (i >= comp_layer_nr) {
- comp = mtk_crtc->ddp_comp[1];
- local_layer = i - comp_layer_nr;
- } else
- local_layer = i;
-
- mtk_ddp_comp_layer_config(comp, local_layer,
- plane_state);
- plane_state->pending.config = false;
- }
+ if (!plane_state->pending.config)
+ continue;
+
+ comp = mtk_drm_ddp_comp_for_plane(crtc, plane,
+ &local_layer);
+
+ mtk_ddp_comp_layer_config(comp, local_layer,
+ plane_state);
+ plane_state->pending.config = false;
}
mtk_crtc->pending_planes = false;
}
}
+int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
+ struct mtk_plane_state *state)
+{
+ unsigned int local_layer;
+ struct mtk_ddp_comp *comp;
+
+ comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
+ return mtk_ddp_comp_layer_check(comp, local_layer, state);
+}
+
static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
@@ -543,14 +565,65 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp)
mtk_drm_finish_page_flip(mtk_crtc);
}
+static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc,
+ int comp_idx)
+{
+ struct mtk_ddp_comp *comp;
+
+ if (comp_idx > 1)
+ return 0;
+
+ comp = mtk_crtc->ddp_comp[comp_idx];
+ if (!comp->funcs)
+ return 0;
+
+ if (comp_idx == 1 && !comp->funcs->bgclr_in_on)
+ return 0;
+
+ return mtk_ddp_comp_layer_nr(comp);
+}
+
+static inline
+enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx)
+{
+ if (plane_idx == 0)
+ return DRM_PLANE_TYPE_PRIMARY;
+ else if (plane_idx == 1)
+ return DRM_PLANE_TYPE_CURSOR;
+ else
+ return DRM_PLANE_TYPE_OVERLAY;
+
+}
+
+static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev,
+ struct mtk_drm_crtc *mtk_crtc,
+ int comp_idx, int pipe)
+{
+ int num_planes = mtk_drm_crtc_num_comp_planes(mtk_crtc, comp_idx);
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[comp_idx];
+ int i, ret;
+
+ for (i = 0; i < num_planes; i++) {
+ ret = mtk_plane_init(drm_dev,
+ &mtk_crtc->planes[mtk_crtc->layer_nr],
+ BIT(pipe),
+ mtk_drm_crtc_plane_type(mtk_crtc->layer_nr),
+ mtk_ddp_comp_supported_rotations(comp));
+ if (ret)
+ return ret;
+
+ mtk_crtc->layer_nr++;
+ }
+ return 0;
+}
+
int mtk_drm_crtc_create(struct drm_device *drm_dev,
const enum mtk_ddp_comp_id *path, unsigned int path_len)
{
struct mtk_drm_private *priv = drm_dev->dev_private;
struct device *dev = drm_dev->dev;
struct mtk_drm_crtc *mtk_crtc;
- enum drm_plane_type type;
- unsigned int zpos;
+ unsigned int num_comp_planes = 0;
int pipe = priv->num_pipes;
int ret;
int i;
@@ -606,23 +679,15 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
mtk_crtc->ddp_comp[i] = comp;
}
- mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]);
- if (mtk_crtc->ddp_comp_nr > 1) {
- struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[1];
+ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
+ num_comp_planes += mtk_drm_crtc_num_comp_planes(mtk_crtc, i);
- if (comp->funcs->bgclr_in_on)
- mtk_crtc->layer_nr += mtk_ddp_comp_layer_nr(comp);
- }
- mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr,
- sizeof(struct drm_plane),
- GFP_KERNEL);
-
- for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) {
- type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY :
- (zpos == 1) ? DRM_PLANE_TYPE_CURSOR :
- DRM_PLANE_TYPE_OVERLAY;
- ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[zpos],
- BIT(pipe), type);
+ mtk_crtc->planes = devm_kcalloc(dev, num_comp_planes,
+ sizeof(struct drm_plane), GFP_KERNEL);
+
+ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
+ ret = mtk_drm_crtc_init_comp_planes(drm_dev, mtk_crtc, i,
+ pipe);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
index fcc134eb00c9..6afe1c19557a 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
@@ -19,5 +19,7 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp);
int mtk_drm_crtc_create(struct drm_device *drm_dev,
const enum mtk_ddp_comp_id *path,
unsigned int path_len);
+int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
+ struct mtk_plane_state *state);
#endif /* MTK_DRM_CRTC_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 26441f4d1ad3..2f1e9e75b8da 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -77,9 +77,13 @@ struct mtk_ddp_comp_funcs {
void (*stop)(struct mtk_ddp_comp *comp);
void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc);
void (*disable_vblank)(struct mtk_ddp_comp *comp);
+ unsigned int (*supported_rotations)(struct mtk_ddp_comp *comp);
unsigned int (*layer_nr)(struct mtk_ddp_comp *comp);
void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx);
void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx);
+ int (*layer_check)(struct mtk_ddp_comp *comp,
+ unsigned int idx,
+ struct mtk_plane_state *state);
void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx,
struct mtk_plane_state *state);
void (*gamma_set)(struct mtk_ddp_comp *comp,
@@ -130,6 +134,15 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp)
comp->funcs->disable_vblank(comp);
}
+static inline
+unsigned int mtk_ddp_comp_supported_rotations(struct mtk_ddp_comp *comp)
+{
+ if (comp->funcs && comp->funcs->supported_rotations)
+ return comp->funcs->supported_rotations(comp);
+
+ return 0;
+}
+
static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp)
{
if (comp->funcs && comp->funcs->layer_nr)
@@ -152,6 +165,15 @@ static inline void mtk_ddp_comp_layer_off(struct mtk_ddp_comp *comp,
comp->funcs->layer_off(comp, idx);
}
+static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp,
+ unsigned int idx,
+ struct mtk_plane_state *state)
+{
+ if (comp->funcs && comp->funcs->layer_check)
+ return comp->funcs->layer_check(comp, idx, state);
+ return 0;
+}
+
static inline void mtk_ddp_comp_layer_config(struct mtk_ddp_comp *comp,
unsigned int idx,
struct mtk_plane_state *state)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index 584a9ecadce6..3b0cc91c7023 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -20,6 +20,12 @@
static const u32 formats[] = {
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_BGRX8888,
+ DRM_FORMAT_BGRA8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_RGB888,
+ DRM_FORMAT_BGR888,
DRM_FORMAT_RGB565,
DRM_FORMAT_UYVY,
DRM_FORMAT_YUYV,
@@ -84,6 +90,7 @@ static int mtk_plane_atomic_check(struct drm_plane *plane,
{
struct drm_framebuffer *fb = state->fb;
struct drm_crtc_state *crtc_state;
+ int ret;
if (!fb)
return 0;
@@ -91,6 +98,11 @@ static int mtk_plane_atomic_check(struct drm_plane *plane,
if (!state->crtc)
return 0;
+ ret = mtk_drm_crtc_plane_check(state->crtc, plane,
+ to_mtk_plane_state(state));
+ if (ret)
+ return ret;
+
crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
if (IS_ERR(crtc_state))
return PTR_ERR(crtc_state);
@@ -132,6 +144,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane,
state->pending.y = plane->state->dst.y1;
state->pending.width = drm_rect_width(&plane->state->dst);
state->pending.height = drm_rect_height(&plane->state->dst);
+ state->pending.rotation = plane->state->rotation;
wmb(); /* Make sure the above parameters are set before update */
state->pending.dirty = true;
}
@@ -154,7 +167,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = {
};
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
- unsigned long possible_crtcs, enum drm_plane_type type)
+ unsigned long possible_crtcs, enum drm_plane_type type,
+ unsigned int supported_rotations)
{
int err;
@@ -166,6 +180,14 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
return err;
}
+ if (supported_rotations & ~DRM_MODE_ROTATE_0) {
+ err = drm_plane_create_rotation_property(plane,
+ DRM_MODE_ROTATE_0,
+ supported_rotations);
+ if (err)
+ DRM_INFO("Create rotation property failed\n");
+ }
+
drm_plane_helper_add(plane, &mtk_plane_helper_funcs);
return 0;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
index 6f842df722c7..760885e35b27 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
@@ -20,6 +20,7 @@ struct mtk_plane_pending_state {
unsigned int y;
unsigned int width;
unsigned int height;
+ unsigned int rotation;
bool dirty;
};
@@ -35,6 +36,7 @@ to_mtk_plane_state(struct drm_plane_state *state)
}
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
- unsigned long possible_crtcs, enum drm_plane_type type);
+ unsigned long possible_crtcs, enum drm_plane_type type,
+ unsigned int supported_rotations);
#endif
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index e686331fa089..691c1a277d91 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -352,26 +352,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg = ioremap(res->start, resource_size(res));
if (cxdbg) {
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0x76543210);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0xFEDCBA98);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
- cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+ cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}
a6xx_state->debugbus = state_kcalloc(a6xx_state,
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 6be879578140..1c74381a4fc9 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -47,12 +47,8 @@ static int msm_gpu_release(struct inode *inode, struct file *file)
struct msm_gpu_show_priv *show_priv = m->private;
struct msm_drm_private *priv = show_priv->dev->dev_private;
struct msm_gpu *gpu = priv->gpu;
- int ret;
-
- ret = mutex_lock_interruptible(&show_priv->dev->struct_mutex);
- if (ret)
- return ret;
+ mutex_lock(&show_priv->dev->struct_mutex);
gpu->funcs->gpu_state_put(show_priv->state);
mutex_unlock(&show_priv->dev->struct_mutex);
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 9d086133a84e..9458dc6c750c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev)
return 0;
err_out2:
+ pm_runtime_disable(pfdev->dev);
panfrost_devfreq_fini(pfdev);
err_out1:
panfrost_device_fini(pfdev);
err_out0:
- pm_runtime_disable(pfdev->dev);
drm_dev_put(ddev);
return err;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index bdd990568476..a3ed64a1f15e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size)
return SZ_2M;
}
-void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
- struct panfrost_mmu *mmu,
- u64 iova, size_t size)
+static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ struct panfrost_mmu *mmu,
+ u64 iova, size_t size)
{
if (mmu->as < 0)
return;
@@ -406,11 +406,11 @@ addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
spin_lock(&pfdev->as_lock);
list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
if (as == mmu->as)
- break;
+ goto found_mmu;
}
- if (as != mmu->as)
- goto out;
+ goto out;
+found_mmu:
priv = container_of(mmu, struct panfrost_file_priv, mmu);
spin_lock(&priv->mm_lock);
@@ -432,7 +432,8 @@ out:
#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
-int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
+static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
+ u64 addr)
{
int ret, i;
struct panfrost_gem_object *bo;
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
index 83c57d325ca8..2dba192bf198 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -16,6 +16,7 @@
#include "panfrost_issues.h"
#include "panfrost_job.h"
#include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
#include "panfrost_regs.h"
#define COUNTERS_PER_BLOCK 64
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 460fd98e40a7..a0b382a637a6 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
case 0x682C:
si_pi->cac_weights = cac_weights_cape_verde_pro;
si_pi->dte_data = dte_data_sun_xt;
+ update_dte_from_pl2 = true;
break;
case 0x6825:
case 0x6827:
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 1a5153197fe9..461a7a8129f4 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -23,6 +23,7 @@
#include <linux/kthread.h>
#include <linux/slab.h>
+#include <linux/completion.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
@@ -68,6 +69,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
if (!entity->rq_list)
return -ENOMEM;
+ init_completion(&entity->entity_idle);
+
for (i = 0; i < num_rq_list; ++i)
entity->rq_list[i] = rq_list[i];
@@ -286,11 +289,12 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
*/
if (spsc_queue_count(&entity->job_queue)) {
if (sched) {
- /* Park the kernel for a moment to make sure it isn't processing
- * our enity.
+ /*
+ * Wait for thread to idle to make sure it isn't processing
+ * this entity.
*/
- kthread_park(sched->thread);
- kthread_unpark(sched->thread);
+ wait_for_completion(&entity->entity_idle);
+
}
if (entity->dependency) {
dma_fence_remove_callback(entity->dependency,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 2af64459b3d7..3c57e84222ca 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -47,6 +47,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/completion.h>
#include <uapi/linux/sched/types.h>
#include <drm/drm_print.h>
@@ -134,6 +135,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
list_for_each_entry_continue(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
+ reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
return entity;
}
@@ -144,6 +146,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
if (drm_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
+ reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
return entity;
}
@@ -496,8 +499,10 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
fence = sched->ops->run_job(s_job);
if (IS_ERR_OR_NULL(fence)) {
+ if (IS_ERR(fence))
+ dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
+
s_job->s_fence->parent = NULL;
- dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
} else {
s_job->s_fence->parent = fence;
}
@@ -645,9 +650,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
struct drm_sched_job *job;
unsigned long flags;
- /* Don't destroy jobs while the timeout worker is running */
- if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
- !cancel_delayed_work(&sched->work_tdr))
+ /*
+ * Don't destroy jobs while the timeout worker is running OR thread
+ * is being parked and hence assumed to not touch ring_mirror_list
+ */
+ if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+ !cancel_delayed_work(&sched->work_tdr)) ||
+ __kthread_should_park(sched->thread))
return NULL;
spin_lock_irqsave(&sched->job_list_lock, flags);
@@ -724,6 +733,9 @@ static int drm_sched_main(void *param)
continue;
sched_job = drm_sched_entity_pop_job(entity);
+
+ complete(&entity->entity_idle);
+
if (!sched_job)
continue;
@@ -746,8 +758,9 @@ static int drm_sched_main(void *param)
r);
dma_fence_put(fence);
} else {
+ if (IS_ERR(fence))
+ dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
- dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
drm_sched_process_job(NULL, &sched_job->cb);
}
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index 01fc670ce7a2..caea2a099496 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,8 +4,8 @@
ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
- ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o \
- ttm_page_alloc_dma.o
+ ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o
ttm-$(CONFIG_AGP) += ttm_agp_backend.o
+ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o
obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index ff54e7609e8f..bf876faea592 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -33,7 +33,6 @@
* when freed).
*/
-#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU)
#define pr_fmt(fmt) "[TTM] " fmt
#include <linux/dma-mapping.h>
@@ -1238,5 +1237,3 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data)
return 0;
}
EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs);
-
-#endif
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 4c4b59ae2c81..549dde83408b 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -560,14 +560,17 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (args->bcl_start != args->bcl_end) {
bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
- if (!bin)
+ if (!bin) {
+ v3d_job_put(&render->base);
return -ENOMEM;
+ }
ret = v3d_job_init(v3d, file_priv, &bin->base,
v3d_job_free, args->in_sync_bcl);
if (ret) {
kfree(bin);
v3d_job_put(&render->base);
+ kfree(bin);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 81a95651643f..e962048f65d2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -576,8 +576,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
else
dev_priv->map_mode = vmw_dma_map_populate;
- /* No TTM coherent page pool? FIXME: Ask TTM instead! */
- if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
+ if (!IS_ENABLED(CONFIG_DRM_TTM_DMA_PAGE_POOL) &&
(dev_priv->map_mode == vmw_dma_alloc_coherent))
return -EINVAL;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 29d8794f0421..de0530b4dc1b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -336,7 +336,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res)
{
struct vmw_private *dev_priv = res->dev_priv;
- struct vmw_surface *srf;
void *cmd;
if (res->func->destroy == vmw_gb_surface_destroy) {
@@ -360,7 +359,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res)
*/
mutex_lock(&dev_priv->cmdbuf_mutex);
- srf = vmw_res_to_srf(res);
dev_priv->used_memory_size -= res->backup_size;
mutex_unlock(&dev_priv->cmdbuf_mutex);
}
OpenPOWER on IntegriCloud