diff options
| author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2016-12-08 17:28:47 +0000 |
|---|---|---|
| committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2016-12-08 17:28:47 +0000 |
| commit | 18009560c59deceb2be577e0182f7016d6ee1121 (patch) | |
| tree | 05c51c8ad3b3de4f579ea884037cded642bec1b2 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | eebed6229ad000a34959f4a9794b9e009eb0227d (diff) | |
| download | bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.tar.gz bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.zip | |
[AMDGPU] Scalarization of global uniform loads.
Summary:
LC can currently select scalar load for uniform memory access
basing on readonly memory address space only. This restriction
originated from the fact that in HW prior to VI vector and scalar caches
are not coherent. With MemoryDependenceAnalysis we can check that the
memory location corresponding to the memory operand of the LOAD is not
clobbered along the all paths from the function entry.
Reviewers: rampitec, tstellarAMD, arsenm
Subscribers: wdng, arsenm, nhaehnle
Differential Revision: https://reviews.llvm.org/D26917
llvm-svn: 289076
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index da60a0f7bdc..a0184bfefd0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -610,6 +610,13 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); } +bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { + const MemSDNode *MemNode = cast<MemSDNode>(N); + const Value *Ptr = MemNode->getMemOperand()->getValue(); + const Instruction *I = dyn_cast<Instruction>(Ptr); + return I && I->getMetadata("amdgpu.noclobber"); +} + bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { // Flat -> private/local is a simple truncate. @@ -2773,11 +2780,19 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (isMemOpUniform(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they - // have the same legalization requires ments as global and private + // have the same legalization requirements as global and private // loads. // LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::GLOBAL_ADDRESS: { + if (isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) + return SDValue(); + // Non-uniform loads will be selected to MUBUF instructions, so they + // have the same legalization requirements as global and private + // loads. + // + } + LLVM_FALLTHROUGH; case AMDGPUAS::FLAT_ADDRESS: if (NumElements > 4) return SplitVectorLoad(Op, DAG); |

