diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2016-06-13 16:05:57 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2016-06-13 16:05:57 +0000 |
| commit | e93f6d69233851e374819dd9066b2c72104e0ac7 (patch) | |
| tree | bb07770243845b268bb72a677e933bc2522fc06c /llvm/lib/Target | |
| parent | d91532725ec28138024d4a20646f208815d1f187 (diff) | |
| download | bcm5719-llvm-e93f6d69233851e374819dd9066b2c72104e0ac7.tar.gz bcm5719-llvm-e93f6d69233851e374819dd9066b2c72104e0ac7.zip | |
AMDGPU/SI: Set INDEX_STRIDE for scratch coalescing
Summary:
Mesa and other users must set this to enable coalescing:
- STRIDE = 0
- SWIZZLE_ENABLE = 1
This makes one particular compute shader 8x faster.
Reviewers: tstellarAMD, arsenm
Subscribers: arsenm, kzhuravl
Differential Revision: http://reviews.llvm.org/D21136
llvm-svn: 272556
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 |
2 files changed, 6 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 24add83e569..af246c07bf7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3095,7 +3095,9 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; - Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT); + Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) | + // IndexStride = 64 + (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT); // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ce1aa2871e7..a20b8178888 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -547,8 +547,9 @@ namespace AMDGPU { int getAtomicNoRetOp(uint16_t Opcode); const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; - const uint64_t RSRC_TID_ENABLE = 1LL << 55; - const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51; + const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); + const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); + const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); } // End namespace AMDGPU namespace SI { |

