drm/nv50: drop explicit yields in favour of smaller PFIFO timeslice

This gives a small, but noticeable performance gain at lower performance levels, and unchanged at the higher ones. With this commit, we're now using the same timeslice size as the NVIDIA binary driver currently does, and dropping an unknown bit that NVIDIA no longer appear to set. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
author: Ben Skeggs <bskeggs@redhat.com> 2011-02-02 14:57:05 +1000
committer: Ben Skeggs <bskeggs@redhat.com> 2011-02-25 06:44:30 +1000
commit: ec23802d616f4e33476cca5c7a975ce1682ad2d7 (patch)
tree: f712a215c8ca5aa69f61fd6d30b990b101bf68b3 /drivers/gpu/drm/nouveau/nouveau_fence.c
parent: e3b7ed5e9972dd4878a5390fd3147a973cbe2d05 (diff)
download: talos-obmc-linux-ec23802d616f4e33476cca5c7a975ce1682ad2d7.tar.gz
talos-obmc-linux-ec23802d616f4e33476cca5c7a975ce1682ad2d7.zip
1 files changed, 6 insertions, 46 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 7eef3a11aaa1..8b46392b0ca9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -330,18 +330,9 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 	int ret;
 
 	if (dev_priv->chipset < 0x84) {
-		if (dev_priv->chipset < 0x50) {
-			ret = RING_SPACE(chan, 3);
-			if (ret)
-				return ret;
-		} else {
-			ret = RING_SPACE(chan, 5);
-			if (ret)
-				return ret;
-
-			BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
-			OUT_RING  (chan, 0);
-		}
+		ret = RING_SPACE(chan, 3);
+		if (ret)
+			return ret;
 
 		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2);
 		OUT_RING  (chan, sema->mem->start);
@@ -351,29 +342,10 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
 		u64 offset = vma->offset + sema->mem->start;
 
-		/*
-		 * NV50 tries to be too smart and context-switch
-		 * between semaphores instead of doing a "first come,
-		 * first served" strategy like previous cards
-		 * do.
-		 *
-		 * That's bad because the ACQUIRE latency can get as
-		 * large as the PFIFO context time slice in the
-		 * typical DRI2 case where you have several
-		 * outstanding semaphores at the same moment.
-		 *
-		 * If we're going to ACQUIRE, force the card to
-		 * context switch before, just in case the matching
-		 * RELEASE is already scheduled to be executed in
-		 * another channel.
-		 */
-
-		ret = RING_SPACE(chan, 7);
+		ret = RING_SPACE(chan, 5);
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubSw, 0x0080, 1);
-		OUT_RING  (chan, 0);
 		BEGIN_RING(chan, NvSubSw, 0x0010, 4);
 		OUT_RING  (chan, upper_32_bits(offset));
 		OUT_RING  (chan, lower_32_bits(offset));
@@ -413,7 +385,7 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 	int ret;
 
 	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, (dev_priv->chipset != 0x50) ? 4 : 6);
+		ret = RING_SPACE(chan, 4);
 		if (ret)
 			return ret;
 
@@ -421,22 +393,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 		OUT_RING  (chan, sema->mem->start);
 		BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1);
 		OUT_RING  (chan, 1);
-		if (dev_priv->chipset == 0x50) {
-			BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
-			OUT_RING  (chan, 0);
-		}
 	} else
 	if (dev_priv->chipset < 0xc0) {
 		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
 		u64 offset = vma->offset + sema->mem->start;
 
-		/*
-		 * Emits release and forces the card to context switch right
-		 * afterwards, there may be another channel waiting for the
-		 * semaphore
-		 */
-
-		ret = RING_SPACE(chan, 7);
+		ret = RING_SPACE(chan, 5);
 		if (ret)
 			return ret;
 
@@ -445,8 +407,6 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
 		OUT_RING  (chan, lower_32_bits(offset));
 		OUT_RING  (chan, 1);
 		OUT_RING  (chan, 2); /* RELEASE */
-		BEGIN_RING(chan, NvSubSw, 0x0080, 1);
-		OUT_RING  (chan, 0);
 	} else {
 		struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
 		u64 offset = vma->offset + sema->mem->start;
author	Ben Skeggs <bskeggs@redhat.com>	2011-02-02 14:57:05 +1000
committer	Ben Skeggs <bskeggs@redhat.com>	2011-02-25 06:44:30 +1000
commit	ec23802d616f4e33476cca5c7a975ce1682ad2d7 (patch)
tree	f712a215c8ca5aa69f61fd6d30b990b101bf68b3 /drivers/gpu/drm/nouveau/nouveau_fence.c
parent	e3b7ed5e9972dd4878a5390fd3147a973cbe2d05 (diff)
download	talos-obmc-linux-ec23802d616f4e33476cca5c7a975ce1682ad2d7.tar.gz talos-obmc-linux-ec23802d616f4e33476cca5c7a975ce1682ad2d7.zip