AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions

Summary: We were previously selecting all constant loads to SMRD instructions and legalizing the SMRDs with non-uniform addresses during the SIFixSGPRCopesPass. This new solution is more simple and also generates much better code, because the instruction selector is able to take advantage of all the MUBUF addressing modes that are legalization pass wasn't able to. We also no longer need to generate v_add_* instructions when we have a uniform pointer and a non-uniform offset, as this is now folded into the MUBUF instruction during instruction selection. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15425 llvm-svn: 255672
author: Tom Stellard <thomas.stellard@amd.com> 2015-12-15 20:55:55 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2015-12-15 20:55:55 +0000
commit: a6f24c6565e0a8134c32532a77cb72dbbdb6834d (patch)
tree: 8da1c9c44225f6d7cd2f690853b29316e1df33cc /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent: 596e97924ae5bd884509d5a99bfd89850cb7a29e (diff)
download: bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.tar.gz
bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.zip
1 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 71864de6957..0e043cb47da 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -504,6 +504,21 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
   return isFlatGlobalAddrSpace(SrcAS) &&  isFlatGlobalAddrSpace(DestAS);
 }
 
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+  const MemSDNode *MemNode = cast<MemSDNode>(N);
+  const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+  // UndefValue means this is a load of a kernel input.  These are uniform.
+  // Sometimes LDS instructions have constant pointers
+  if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+      isa<GlobalValue>(Ptr))
+    return true;
+
+  const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+  return I && I->getMetadata("amdgpu.uniform");
+}
+
 TargetLoweringBase::LegalizeTypeAction
 SITargetLowering::getPreferredVectorAction(EVT VT) const {
   if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -1328,6 +1343,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
     switch (Load->getAddressSpace()) {
       default: break;
+      case AMDGPUAS::CONSTANT_ADDRESS:
+      if (isMemOpUniform(Load))
+        break;
+        // Non-uniform loads will be selected to MUBUF instructions, so they
+        // have the same legalization requires ments as global and private
+        // loads.
+        //
+        // Fall-through
       case AMDGPUAS::GLOBAL_ADDRESS:
       case AMDGPUAS::PRIVATE_ADDRESS:
         if (NumElements >= 8)
author	Tom Stellard <thomas.stellard@amd.com>	2015-12-15 20:55:55 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2015-12-15 20:55:55 +0000
commit	a6f24c6565e0a8134c32532a77cb72dbbdb6834d (patch)
tree	8da1c9c44225f6d7cd2f690853b29316e1df33cc /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent	596e97924ae5bd884509d5a99bfd89850cb7a29e (diff)
download	bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.tar.gz bcm5719-llvm-a6f24c6565e0a8134c32532a77cb72dbbdb6834d.zip