summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp57
-rw-r--r--llvm/test/CodeGen/NVPTX/disable-opt.ll12
2 files changed, 48 insertions, 21 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index aa931b134da..04b8f49e55b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -143,14 +143,20 @@ public:
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
private:
- // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
+ // function is only called in opt mode.
void addEarlyCSEOrGVNPass();
+
+ // Add passes that propagate special memory spaces.
+ void addMemorySpaceInferencePasses();
+
+ // Add passes that perform straight-line scalar optimizations.
+ void addStraightLineScalarOptimizationPasses();
};
} // end anonymous namespace
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
- NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
- return PassConfig;
+ return new NVPTXPassConfig(this, PM);
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -166,22 +172,7 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
addPass(createEarlyCSEPass());
}
-void NVPTXPassConfig::addIRPasses() {
- // The following passes are known to not play well with virtual regs hanging
- // around after register allocation (which in our case, is *all* registers).
- // We explicitly disable them here. We do, however, need some functionality
- // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
- // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
- disablePass(&PrologEpilogCodeInserterID);
- disablePass(&MachineCopyPropagationID);
- disablePass(&TailDuplicateID);
-
- addPass(createNVVMReflectPass());
- addPass(createNVPTXImageOptimizerPass());
- addPass(createNVPTXAssignValidGlobalNamesPass());
- addPass(createGenericToNVVMPass());
-
- // === Propagate special address spaces ===
+void NVPTXPassConfig::addMemorySpaceInferencePasses() {
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@@ -192,8 +183,9 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
+}
- // === Straight-line scalar optimizations ===
+void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
@@ -208,6 +200,28 @@ void NVPTXPassConfig::addIRPasses() {
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+}
+
+void NVPTXPassConfig::addIRPasses() {
+ // The following passes are known to not play well with virtual regs hanging
+ // around after register allocation (which in our case, is *all* registers).
+ // We explicitly disable them here. We do, however, need some functionality
+ // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
+ // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
+ disablePass(&PrologEpilogCodeInserterID);
+ disablePass(&MachineCopyPropagationID);
+ disablePass(&TailDuplicateID);
+
+ addPass(createNVVMReflectPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createNVPTXImageOptimizerPass());
+ addPass(createNVPTXAssignValidGlobalNamesPass());
+ addPass(createGenericToNVVMPass());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMemorySpaceInferencePasses();
+ addStraightLineScalarOptimizationPasses();
+ }
// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();
@@ -223,7 +237,8 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- addEarlyCSEOrGVNPass();
+ if (getOptLevel() != CodeGenOpt::None)
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/disable-opt.ll b/llvm/test/CodeGen/NVPTX/disable-opt.ll
new file mode 100644
index 00000000000..15e4913c169
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/disable-opt.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
+
+define void @foo(i32* %output) {
+; CHECK-LABEL: .visible .func foo(
+entry:
+ %local = alloca i32
+; CHECK: __local_depot
+ store i32 1, i32* %local
+ %0 = load i32, i32* %local
+ store i32 %0, i32* %output
+ ret void
+}
OpenPOWER on IntegriCloud