[AMDGPU] Scalarization of global uniform loads.

Summary: LC can currently select scalar load for uniform memory access basing on readonly memory address space only. This restriction originated from the fact that in HW prior to VI vector and scalar caches are not coherent. With MemoryDependenceAnalysis we can check that the memory location corresponding to the memory operand of the LOAD is not clobbered along the all paths from the function entry. Reviewers: rampitec, tstellarAMD, arsenm Subscribers: wdng, arsenm, nhaehnle Differential Revision: https://reviews.llvm.org/D26917 llvm-svn: 289076
author: Alexander Timofeev <Alexander.Timofeev@amd.com> 2016-12-08 17:28:47 +0000
committer: Alexander Timofeev <Alexander.Timofeev@amd.com> 2016-12-08 17:28:47 +0000
commit: 18009560c59deceb2be577e0182f7016d6ee1121 (patch)
tree: 05c51c8ad3b3de4f579ea884037cded642bec1b2 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent: eebed6229ad000a34959f4a9794b9e009eb0227d (diff)
download: bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.tar.gz
bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.zip
1 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index da60a0f7bdc..a0184bfefd0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -610,6 +610,13 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
   return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
 }
 
+bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
+  const MemSDNode *MemNode = cast<MemSDNode>(N);
+  const Value *Ptr = MemNode->getMemOperand()->getValue();
+  const Instruction *I = dyn_cast<Instruction>(Ptr);
+  return I && I->getMetadata("amdgpu.noclobber");
+}
+
 bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
                                             unsigned DestAS) const {
   // Flat -> private/local is a simple truncate.
@@ -2773,11 +2780,19 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     if (isMemOpUniform(Load))
       return SDValue();
     // Non-uniform loads will be selected to MUBUF instructions, so they
-    // have the same legalization requires ments as global and private
+    // have the same legalization requirements as global and private
     // loads.
     //
     LLVM_FALLTHROUGH;
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  case AMDGPUAS::GLOBAL_ADDRESS: {
+    if (isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load))
+      return SDValue();
+    // Non-uniform loads will be selected to MUBUF instructions, so they
+    // have the same legalization requirements as global and private
+    // loads.
+    //
+  }
+    LLVM_FALLTHROUGH;
   case AMDGPUAS::FLAT_ADDRESS:
     if (NumElements > 4)
       return SplitVectorLoad(Op, DAG);
author	Alexander Timofeev <Alexander.Timofeev@amd.com>	2016-12-08 17:28:47 +0000
committer	Alexander Timofeev <Alexander.Timofeev@amd.com>	2016-12-08 17:28:47 +0000
commit	18009560c59deceb2be577e0182f7016d6ee1121 (patch)
tree	05c51c8ad3b3de4f579ea884037cded642bec1b2 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent	eebed6229ad000a34959f4a9794b9e009eb0227d (diff)
download	bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.tar.gz bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.zip