Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "This is the main drm pull request for v4.15. Core: - Atomic object lifetime fixes - Atomic iterator improvements - Sparse/smatch fixes - Legacy kms ioctls to be interruptible - EDID override improvements - fb/gem helper cleanups - Simple outreachy patches - Documentation improvements - Fix dma-buf rcu races - DRM mode object leasing for improving VR use cases. - vgaarb improvements for non-x86 platforms. New driver: - tve200: Faraday Technology TVE200 block. This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in the StorLink SL3516 (later Cortina Systems CS3516) as well as the Grain Media GM8180. New bridges: - SiI9234 support New panels: - S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba LT089AC19000, Innolux AT043TN24 i915: - Remove Coffeelake from alpha support - Cannonlake workarounds - Infoframe refactoring for DisplayPort - VBT updates - DisplayPort vswing/emph/buffer translation refactoring - CCS fixes - Restore GPU clock boost on missed vblanks - Scatter list updates for userptr allocations - Gen9+ transition watermarks - Display IPC (Isochronous Priority Control) - Private PAT management - GVT: improved error handling and pci config sanitizing - Execlist refactoring - Transparent Huge Page support - User defined priorities support - HuC/GuC firmware refactoring - DP MST fixes - eDP power sequencing fixes - Use RCU instead of stop_machine - PSR state tracking support - Eviction fixes - BDW DP aux channel timeout fixes - LSPCON fixes - Cannonlake PLL fixes amdgpu: - Per VM BO support - Powerplay cleanups - CI powerplay support - PASID mgr for kfd - SR-IOV fixes - initial GPU reset for vega10 - Prime mmap support - TTM updates - Clock query interface for Raven - Fence to handle ioctl - UVD encode ring support on Polaris - Transparent huge page DMA support - Compute LRU pipe tweaks - BO flag to allow buffers to opt out of implicit sync - CTX priority setting API - VRAM lost infrastructure plumbing qxl: - fix flicker since atomic rework amdkfd: - Further improvements from internal AMD tree - Usermode events - Drop radeon support nouveau: - Pascal temperature sensor support - Improved BAR2 handling - MMU rework to support Pascal MMU exynos: - Improved HDMI/mixer support - HDMI audio interface support tegra: - Prep work for tegra186 - Cleanup/fixes msm: - Preemption support for a5xx - Display fixes for 8x96 (snapdragon 820) - Async cursor plane fixes - FW loading rework - GPU debugging improvements vc4: - Prep for DSI panels - fix T-format tiling scanout - New madvise ioctl Rockchip: - LVDS support omapdrm: - omap4 HDMI CEC support etnaviv: - GPU performance counters groundwork sun4i: - refactor driver load + TCON backend - HDMI improvements - A31 support - Misc fixes udl: - Probe/EDID read fixes. tilcdc: - Misc fixes. pl111: - Support more variants adv7511: - Improve EDID handling. - HDMI CEC support sii8620: - Add remote control support" * tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits) drm/rockchip: analogix_dp: Use mutex rather than spinlock drm/mode_object: fix documentation for object lookups. drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU drm/i915: Move init_clock_gating() back to where it was drm/i915: Prune the reservation shared fence array drm/i915: Idle the GPU before shinking everything drm/i915: Lock llist_del_first() vs llist_del_all() drm/i915: Calculate ironlake intermediate watermarks correctly, v2. drm/i915: Disable lazy PPGTT page table optimization for vGPU drm/i915/execlists: Remove the priority "optimisation" drm/i915: Filter out spurious execlists context-switch interrupts drm/amdgpu: use irq-safe lock for kiq->ring_lock drm/amdgpu: bypass lru touch for KIQ ring submission drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories() drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs() drm/amd/powerplay: initialize a variable before using it drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug drm/rockchip: add CONFIG_OF dependency for lvds ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 20:42:10 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 20:42:10 -0800
commit: e60e1ee60630cafef5e430c2ae364877e061d980 (patch)
tree: 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/msm/msm_gpu.c
parent: 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff)
parent: f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff)
download: talos-op-linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.gz
talos-op-linux-e60e1ee60630cafef5e430c2ae364877e061d980.zip
1 files changed, 177 insertions, 58 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 6a887032c66a..8d4477818ec2 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -20,6 +20,8 @@
 #include "msm_mmu.h"
 #include "msm_fence.h"
 
+#include <linux/string_helpers.h>
+
 
 /*
  * Power Management:
@@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu)
  * Hangcheck detection for locked gpu:
  */
 
+static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+		uint32_t fence)
+{
+	struct msm_gem_submit *submit;
+
+	list_for_each_entry(submit, &ring->submits, node) {
+		if (submit->seqno > fence)
+			break;
+
+		msm_update_fence(submit->ring->fctx,
+			submit->fence->seqno);
+	}
+}
+
+static struct msm_gem_submit *
+find_submit(struct msm_ringbuffer *ring, uint32_t fence)
+{
+	struct msm_gem_submit *submit;
+
+	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
+
+	list_for_each_entry(submit, &ring->submits, node)
+		if (submit->seqno == fence)
+			return submit;
+
+	return NULL;
+}
+
 static void retire_submits(struct msm_gpu *gpu);
 
 static void recover_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
-	uint32_t fence = gpu->funcs->last_fence(gpu);
-
-	msm_update_fence(gpu->fctx, fence + 1);
+	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
+	int i;
 
 	mutex_lock(&dev->struct_mutex);
 
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-	list_for_each_entry(submit, &gpu->submit_list, node) {
-		if (submit->fence->seqno == (fence + 1)) {
-			struct task_struct *task;
-
-			rcu_read_lock();
-			task = pid_task(submit->pid, PIDTYPE_PID);
-			if (task) {
-				dev_err(dev->dev, "%s: offending task: %s\n",
-						gpu->name, task->comm);
-			}
-			rcu_read_unlock();
-			break;
+
+	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
+	if (submit) {
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(submit->pid, PIDTYPE_PID);
+		if (task) {
+			char *cmd;
+
+			/*
+			 * So slightly annoying, in other paths like
+			 * mmap'ing gem buffers, mmap_sem is acquired
+			 * before struct_mutex, which means we can't
+			 * hold struct_mutex across the call to
+			 * get_cmdline().  But submits are retired
+			 * from the same in-order workqueue, so we can
+			 * safely drop the lock here without worrying
+			 * about the submit going away.
+			 */
+			mutex_unlock(&dev->struct_mutex);
+			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+			mutex_lock(&dev->struct_mutex);
+
+			dev_err(dev->dev, "%s: offending task: %s (%s)\n",
+				gpu->name, task->comm, cmd);
+
+			msm_rd_dump_submit(priv->hangrd, submit,
+				"offending task: %s (%s)", task->comm, cmd);
+		} else {
+			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
+		rcu_read_unlock();
+	}
+
+
+	/*
+	 * Update all the rings with the latest and greatest fence.. this
+	 * needs to happen after msm_rd_dump_submit() to ensure that the
+	 * bo's referenced by the offending submit are still around.
+	 */
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
+
+		uint32_t fence = ring->memptrs->fence;
+
+		/*
+		 * For the current (faulting?) ring/submit advance the fence by
+		 * one more to clear the faulting submit
+		 */
+		if (ring == cur_ring)
+			fence++;
+
+		update_fences(gpu, ring, fence);
 	}
 
 	if (msm_gpu_active(gpu)) {
@@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work)
 		gpu->funcs->recover(gpu);
 		pm_runtime_put_sync(&gpu->pdev->dev);
 
-		/* replay the remaining submits after the one that hung: */
-		list_for_each_entry(submit, &gpu->submit_list, node) {
-			gpu->funcs->submit(gpu, submit, NULL);
+		/*
+		 * Replay all remaining submits starting with highest priority
+		 * ring
+		 */
+		for (i = 0; i < gpu->nr_rings; i++) {
+			struct msm_ringbuffer *ring = gpu->rb[i];
+
+			list_for_each_entry(submit, &ring->submits, node)
+				gpu->funcs->submit(gpu, submit, NULL);
 		}
 	}
 
@@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data)
 	struct msm_gpu *gpu = (struct msm_gpu *)data;
 	struct drm_device *dev = gpu->dev;
 	struct msm_drm_private *priv = dev->dev_private;
-	uint32_t fence = gpu->funcs->last_fence(gpu);
+	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
+	uint32_t fence = ring->memptrs->fence;
 
-	if (fence != gpu->hangcheck_fence) {
+	if (fence != ring->hangcheck_fence) {
 		/* some progress has been made.. ya! */
-		gpu->hangcheck_fence = fence;
-	} else if (fence < gpu->fctx->last_fence) {
+		ring->hangcheck_fence = fence;
+	} else if (fence < ring->seqno) {
 		/* no progress and not done.. hung! */
-		gpu->hangcheck_fence = fence;
-		dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
-				gpu->name);
+		ring->hangcheck_fence = fence;
+		dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
+				gpu->name, ring->id);
 		dev_err(dev->dev, "%s:     completed fence: %u\n",
 				gpu->name, fence);
 		dev_err(dev->dev, "%s:     submitted fence: %u\n",
-				gpu->name, gpu->fctx->last_fence);
+				gpu->name, ring->seqno);
+
 		queue_work(priv->wq, &gpu->recover_work);
 	}
 
 	/* if still more pending work, reset the hangcheck timer: */
-	if (gpu->fctx->last_fence > gpu->hangcheck_fence)
+	if (ring->seqno > ring->hangcheck_fence)
 		hangcheck_timer_reset(gpu);
 
 	/* workaround for missing irq: */
@@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 static void retire_submits(struct msm_gpu *gpu)
 {
 	struct drm_device *dev = gpu->dev;
+	struct msm_gem_submit *submit, *tmp;
+	int i;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	while (!list_empty(&gpu->submit_list)) {
-		struct msm_gem_submit *submit;
-
-		submit = list_first_entry(&gpu->submit_list,
-				struct msm_gem_submit, node);
+	/* Retire the commits starting with highest priority */
+	for (i = 0; i < gpu->nr_rings; i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
 
-		if (dma_fence_is_signaled(submit->fence)) {
-			retire_submit(gpu, submit);
-		} else {
-			break;
+		list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
+			if (dma_fence_is_signaled(submit->fence))
+				retire_submit(gpu, submit);
 		}
 	}
 }
@@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
 	struct drm_device *dev = gpu->dev;
-	uint32_t fence = gpu->funcs->last_fence(gpu);
+	int i;
 
-	msm_update_fence(gpu->fctx, fence);
+	for (i = 0; i < gpu->nr_rings; i++)
+		update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
 
 	mutex_lock(&dev->struct_mutex);
 	retire_submits(gpu);
@@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 {
 	struct drm_device *dev = gpu->dev;
 	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_ringbuffer *ring = submit->ring;
 	int i;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	msm_gpu_hw_init(gpu);
 
-	list_add_tail(&submit->node, &gpu->submit_list);
+	submit->seqno = ++ring->seqno;
+
+	list_add_tail(&submit->node, &ring->submits);
 
-	msm_rd_dump_submit(submit);
+	msm_rd_dump_submit(priv->rd, submit, NULL);
 
 	update_sw_cntrs(gpu);
 
@@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 		const char *name, struct msm_gpu_config *config)
 {
-	int ret;
+	int i, ret, nr_rings = config->nr_rings;
+	void *memptrs;
+	uint64_t memptrs_iova;
 
 	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
 		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
@@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	gpu->dev = drm;
 	gpu->funcs = funcs;
 	gpu->name = name;
-	gpu->fctx = msm_fence_context_alloc(drm, name);
-	if (IS_ERR(gpu->fctx)) {
-		ret = PTR_ERR(gpu->fctx);
-		gpu->fctx = NULL;
-		goto fail;
-	}
 
 	INIT_LIST_HEAD(&gpu->active_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 
-	INIT_LIST_HEAD(&gpu->submit_list);
 
 	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
 			(unsigned long)gpu);
@@ -689,34 +766,76 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		goto fail;
 	}
 
-	/* Create ringbuffer: */
-	gpu->rb = msm_ringbuffer_new(gpu, config->ringsz);
-	if (IS_ERR(gpu->rb)) {
-		ret = PTR_ERR(gpu->rb);
-		gpu->rb = NULL;
-		dev_err(drm->dev, "could not create ringbuffer: %d\n", ret);
+	memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo),
+		MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
+		&memptrs_iova);
+
+	if (IS_ERR(memptrs)) {
+		ret = PTR_ERR(memptrs);
+		dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
 		goto fail;
 	}
 
+	if (nr_rings > ARRAY_SIZE(gpu->rb)) {
+		DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
+			ARRAY_SIZE(gpu->rb));
+		nr_rings = ARRAY_SIZE(gpu->rb);
+	}
+
+	/* Create ringbuffer(s): */
+	for (i = 0; i < nr_rings; i++) {
+		gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);
+
+		if (IS_ERR(gpu->rb[i])) {
+			ret = PTR_ERR(gpu->rb[i]);
+			dev_err(drm->dev,
+				"could not create ringbuffer %d: %d\n", i, ret);
+			goto fail;
+		}
+
+		memptrs += sizeof(struct msm_rbmemptrs);
+		memptrs_iova += sizeof(struct msm_rbmemptrs);
+	}
+
+	gpu->nr_rings = nr_rings;
+
 	return 0;
 
 fail:
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++)  {
+		msm_ringbuffer_destroy(gpu->rb[i]);
+		gpu->rb[i] = NULL;
+	}
+
+	if (gpu->memptrs_bo) {
+		msm_gem_put_vaddr(gpu->memptrs_bo);
+		msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
+		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+	}
+
 	platform_set_drvdata(pdev, NULL);
 	return ret;
 }
 
 void msm_gpu_cleanup(struct msm_gpu *gpu)
 {
+	int i;
+
 	DBG("%s", gpu->name);
 
 	WARN_ON(!list_empty(&gpu->active_list));
 
 	bs_fini(gpu);
 
-	if (gpu->rb) {
-		if (gpu->rb_iova)
-			msm_gem_put_iova(gpu->rb->bo, gpu->aspace);
-		msm_ringbuffer_destroy(gpu->rb);
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		msm_ringbuffer_destroy(gpu->rb[i]);
+		gpu->rb[i] = NULL;
+	}
+
+	if (gpu->memptrs_bo) {
+		msm_gem_put_vaddr(gpu->memptrs_bo);
+		msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
+		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
 	}
 
 	if (!IS_ERR_OR_NULL(gpu->aspace)) {
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 20:42:10 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 20:42:10 -0800
commit	e60e1ee60630cafef5e430c2ae364877e061d980 (patch)
tree	816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/msm/msm_gpu.c
parent	5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff)
parent	f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff)
download	talos-op-linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.gz talos-op-linux-e60e1ee60630cafef5e430c2ae364877e061d980.zip