summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorAlexander Timofeev <Alexander.Timofeev@amd.com>2016-12-08 17:28:47 +0000
committerAlexander Timofeev <Alexander.Timofeev@amd.com>2016-12-08 17:28:47 +0000
commit18009560c59deceb2be577e0182f7016d6ee1121 (patch)
tree05c51c8ad3b3de4f579ea884037cded642bec1b2 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parenteebed6229ad000a34959f4a9794b9e009eb0227d (diff)
downloadbcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.tar.gz
bcm5719-llvm-18009560c59deceb2be577e0182f7016d6ee1121.zip
[AMDGPU] Scalarization of global uniform loads.
Summary: LC can currently select scalar load for uniform memory access basing on readonly memory address space only. This restriction originated from the fact that in HW prior to VI vector and scalar caches are not coherent. With MemoryDependenceAnalysis we can check that the memory location corresponding to the memory operand of the LOAD is not clobbered along the all paths from the function entry. Reviewers: rampitec, tstellarAMD, arsenm Subscribers: wdng, arsenm, nhaehnle Differential Revision: https://reviews.llvm.org/D26917 llvm-svn: 289076
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e1fd95d0917..a62975cde27 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -61,6 +61,14 @@ static cl::opt<bool> EnableLoadStoreVectorizer(
cl::init(true),
cl::Hidden);
+// Option to to control global loads scalarization
+static cl::opt<bool> ScalarizeGlobal(
+ "amdgpu-scalarize-global-loads",
+ cl::desc("Enable global load scalarization"),
+ cl::init(false),
+ cl::Hidden);
+
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -262,6 +270,8 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
I->setGISelAccessor(*GISel);
}
+ I->setScalarizeGlobalBehavior(ScalarizeGlobal);
+
return I.get();
}
OpenPOWER on IntegriCloud