summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2010-10-02 17:04:46 +0200
committerBen Skeggs <bskeggs@redhat.com>2010-10-05 09:59:28 +1000
commit8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba (patch)
tree1b572625051c09fccca446db2966f6b07fa837f8 /drivers/gpu
parent647988175234a733cc0d4cf968949344803a77a7 (diff)
downloadtalos-op-linux-8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba.tar.gz
talos-op-linux-8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba.zip
drm/nv50: Fix large 3D performance regression caused by the interchannel sync patches.
Reported-by: Christoph Bumiller <e0425955@student.tuwien.ac.at> Signed-off-by: Francisco Jerez <currojerez@riseup.net> Tested-by: Maarten Maathuis <madman2003@gmail.com> Tested-by: Xavier Chantry <chantry.xavier@gmail.com> Tested-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c46
2 files changed, 40 insertions, 7 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index a308c132c19b..3a07e580d27a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -1473,6 +1473,7 @@ nv_match_device(struct drm_device *dev, unsigned device,
#define NV_SW_SEMAPHORE_OFFSET 0x00000064
#define NV_SW_SEMAPHORE_ACQUIRE 0x00000068
#define NV_SW_SEMAPHORE_RELEASE 0x0000006c
+#define NV_SW_YIELD 0x00000080
#define NV_SW_DMA_VBLSEM 0x0000018c
#define NV_SW_VBLSEM_OFFSET 0x00000400
#define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index f42675cc9d14..441b12420bb1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -308,21 +308,54 @@ emit_semaphore(struct nouveau_channel *chan, int method,
{
struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
struct nouveau_fence *fence;
+ bool smart = (dev_priv->card_type >= NV_50);
int ret;
- ret = RING_SPACE(chan, dev_priv->card_type >= NV_50 ? 6 : 4);
+ ret = RING_SPACE(chan, smart ? 8 : 4);
if (ret)
return ret;
- if (dev_priv->card_type >= NV_50) {
+ if (smart) {
BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
OUT_RING(chan, NvSema);
}
BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
OUT_RING(chan, sema->mem->start);
+
+ if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
+ /*
+ * NV50 tries to be too smart and context-switch
+ * between semaphores instead of doing a "first come,
+ * first served" strategy like previous cards
+ * do.
+ *
+ * That's bad because the ACQUIRE latency can get as
+ * large as the PFIFO context time slice in the
+ * typical DRI2 case where you have several
+ * outstanding semaphores at the same moment.
+ *
+ * If we're going to ACQUIRE, force the card to
+ * context switch before, just in case the matching
+ * RELEASE is already scheduled to be executed in
+ * another channel.
+ */
+ BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+ OUT_RING(chan, 0);
+ }
+
BEGIN_RING(chan, NvSubSw, method, 1);
OUT_RING(chan, 1);
+ if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
+ /*
+ * Force the card to context switch, there may be
+ * another channel waiting for the semaphore we just
+ * released.
+ */
+ BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+ OUT_RING(chan, 0);
+ }
+
/* Delay semaphore destruction until its work is done */
ret = nouveau_fence_new(chan, &fence, true);
if (ret)
@@ -355,14 +388,13 @@ nouveau_fence_sync(struct nouveau_fence *fence,
return nouveau_fence_wait(fence, NULL, false, false);
}
- /* Signal the semaphore from chan */
- ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
- if (ret)
- goto out;
-
/* Make wchan wait until it gets signalled */
ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
+ if (ret)
+ goto out;
+ /* Signal the semaphore from chan */
+ ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
out:
kref_put(&sema->ref, free_semaphore);
return ret;
OpenPOWER on IntegriCloud