diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2015-12-15 20:55:55 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2015-12-15 20:55:55 +0000 |
| commit | a6f24c6565e0a8134c32532a77cb72dbbdb6834d (patch) | |
| tree | 8da1c9c44225f6d7cd2f690853b29316e1df33cc /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | 596e97924ae5bd884509d5a99bfd89850cb7a29e (diff) | |
| download | bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.tar.gz bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.zip | |
AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions
Summary:
We were previously selecting all constant loads to SMRD instructions and legalizing
the SMRDs with non-uniform addresses during the SIFixSGPRCopesPass.
This new solution is more simple and also generates much better code, because
the instruction selector is able to take advantage of all the MUBUF addressing
modes that are legalization pass wasn't able to.
We also no longer need to generate v_add_* instructions when we
have a uniform pointer and a non-uniform offset, as this is now folded into the
MUBUF instruction during instruction selection.
Reviewers: arsenm
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15425
llvm-svn: 255672
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 71864de6957..0e043cb47da 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -504,6 +504,21 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); } + +bool SITargetLowering::isMemOpUniform(const SDNode *N) const { + const MemSDNode *MemNode = cast<MemSDNode>(N); + const Value *Ptr = MemNode->getMemOperand()->getValue(); + + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers + if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) || + isa<GlobalValue>(Ptr)) + return true; + + const Instruction *I = dyn_cast_or_null<Instruction>(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} + TargetLoweringBase::LegalizeTypeAction SITargetLowering::getPreferredVectorAction(EVT VT) const { if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) @@ -1328,6 +1343,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { switch (Load->getAddressSpace()) { default: break; + case AMDGPUAS::CONSTANT_ADDRESS: + if (isMemOpUniform(Load)) + break; + // Non-uniform loads will be selected to MUBUF instructions, so they + // have the same legalization requires ments as global and private + // loads. + // + // Fall-through case AMDGPUAS::GLOBAL_ADDRESS: case AMDGPUAS::PRIVATE_ADDRESS: if (NumElements >= 8) |

