summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2016-06-13 16:05:57 +0000
committerMarek Olsak <marek.olsak@amd.com>2016-06-13 16:05:57 +0000
commite93f6d69233851e374819dd9066b2c72104e0ac7 (patch)
treebb07770243845b268bb72a677e933bc2522fc06c /llvm/lib/Target
parentd91532725ec28138024d4a20646f208815d1f187 (diff)
downloadbcm5719-llvm-e93f6d69233851e374819dd9066b2c72104e0ac7.tar.gz
bcm5719-llvm-e93f6d69233851e374819dd9066b2c72104e0ac7.zip
AMDGPU/SI: Set INDEX_STRIDE for scratch coalescing
Summary: Mesa and other users must set this to enable coalescing: - STRIDE = 0 - SWIZZLE_ENABLE = 1 This makes one particular compute shader 8x faster. Reviewers: tstellarAMD, arsenm Subscribers: arsenm, kzhuravl Differential Revision: http://reviews.llvm.org/D21136 llvm-svn: 272556
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h5
2 files changed, 6 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 24add83e569..af246c07bf7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3095,7 +3095,9 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
- Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
+ Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
+ // IndexStride = 64
+ (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index ce1aa2871e7..a20b8178888 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -547,8 +547,9 @@ namespace AMDGPU {
int getAtomicNoRetOp(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
- const uint64_t RSRC_TID_ENABLE = 1LL << 55;
- const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
+ const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
+ const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
+ const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
} // End namespace AMDGPU
namespace SI {
OpenPOWER on IntegriCloud