summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-29 01:44:16 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-29 01:44:16 +0000
commite6740754f083f7d85e5a8afcc959c8d5b13b32c6 (patch)
tree6c1aff82a41ecb07f3ace675912f966aa4a68fd3 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parentae689e3498d9ad5fe9aa7d759a2f8cf9f017563c (diff)
downloadbcm5719-llvm-e6740754f083f7d85e5a8afcc959c8d5b13b32c6.tar.gz
bcm5719-llvm-e6740754f083f7d85e5a8afcc959c8d5b13b32c6.zip
AMDGPU: Partially fix control flow at -O0
Fixes to allow spilling all registers at the end of the block work with exec modifications. Don't emit s_and_saveexec_b64 for if lowering, and instead emit copies. Mark control flow mask instructions as terminators to get correct spill code placement with fast regalloc, and then have a separate optimization pass form the saveexec. This should work if SGPRs are spilled to VGPRs, but will likely fail in the case that an SGPR spills to memory and no workitem takes a divergent branch. llvm-svn: 282667
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp21
1 files changed, 16 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2a9f76212b9..aa2ebbe2852 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -83,6 +83,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIDebuggerInsertNopsPass(*PR);
+ initializeSIOptimizeExecMaskingPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -333,6 +334,7 @@ public:
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
@@ -548,7 +550,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
#endif
void GCNPassConfig::addPreRegAlloc() {
-
addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());
}
@@ -556,7 +557,11 @@ void GCNPassConfig::addPreRegAlloc() {
void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
// FIXME: We have to disable the verifier here because of PHIElimination +
// TwoAddressInstructions disabling it.
- insertPass(&TwoAddressInstructionPassID, &SILowerControlFlowID, false);
+
+ // This must be run immediately after phi elimination and before
+ // TwoAddressInstructions, otherwise the processing of the tied operand of
+ // SI_ELSE will introduce a copy of the tied operand source after the else.
+ insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
TargetPassConfig::addFastRegAlloc(RegAllocPass);
}
@@ -566,13 +571,19 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// passes might recompute live intervals.
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
- // TODO: It might be better to run this right after phi elimination, but for
- // now that would require not running the verifier.
- insertPass(&RenameIndependentSubregsID, &SILowerControlFlowID);
+ // This must be run immediately after phi elimination and before
+ // TwoAddressInstructions, otherwise the processing of the tied operand of
+ // SI_ELSE will introduce a copy of the tied operand source after the else.
+ insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
}
+void GCNPassConfig::addPostRegAlloc() {
+ addPass(&SIOptimizeExecMaskingID);
+ TargetPassConfig::addPostRegAlloc();
+}
+
void GCNPassConfig::addPreSched2() {
}
OpenPOWER on IntegriCloud