diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-29 01:44:16 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-29 01:44:16 +0000 |
commit | e6740754f083f7d85e5a8afcc959c8d5b13b32c6 (patch) | |
tree | 6c1aff82a41ecb07f3ace675912f966aa4a68fd3 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | |
parent | ae689e3498d9ad5fe9aa7d759a2f8cf9f017563c (diff) | |
download | bcm5719-llvm-e6740754f083f7d85e5a8afcc959c8d5b13b32c6.tar.gz bcm5719-llvm-e6740754f083f7d85e5a8afcc959c8d5b13b32c6.zip |
AMDGPU: Partially fix control flow at -O0
Fixes to allow spilling all registers at the end of the block
work with exec modifications. Don't emit s_and_saveexec_b64 for
if lowering, and instead emit copies. Mark control flow mask
instructions as terminators to get correct spill code placement
with fast regalloc, and then have a separate optimization pass
form the saveexec.
This should work if SGPRs are spilled to VGPRs, but
will likely fail in the case that an SGPR spills to memory
and no workitem takes a divergent branch.
llvm-svn: 282667
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2a9f76212b9..aa2ebbe2852 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -83,6 +83,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSILowerControlFlowPass(*PR); initializeSIInsertSkipsPass(*PR); initializeSIDebuggerInsertNopsPass(*PR); + initializeSIOptimizeExecMaskingPass(*PR); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -333,6 +334,7 @@ public: void addFastRegAlloc(FunctionPass *RegAllocPass) override; void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; void addPreRegAlloc() override; + void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -548,7 +550,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() { #endif void GCNPassConfig::addPreRegAlloc() { - addPass(createSIShrinkInstructionsPass()); addPass(createSIWholeQuadModePass()); } @@ -556,7 +557,11 @@ void GCNPassConfig::addPreRegAlloc() { void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { // FIXME: We have to disable the verifier here because of PHIElimination + // TwoAddressInstructions disabling it. - insertPass(&TwoAddressInstructionPassID, &SILowerControlFlowID, false); + + // This must be run immediately after phi elimination and before + // TwoAddressInstructions, otherwise the processing of the tied operand of + // SI_ELSE will introduce a copy of the tied operand source after the else. + insertPass(&PHIEliminationID, &SILowerControlFlowID, false); TargetPassConfig::addFastRegAlloc(RegAllocPass); } @@ -566,13 +571,19 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // passes might recompute live intervals. insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); - // TODO: It might be better to run this right after phi elimination, but for - // now that would require not running the verifier. - insertPass(&RenameIndependentSubregsID, &SILowerControlFlowID); + // This must be run immediately after phi elimination and before + // TwoAddressInstructions, otherwise the processing of the tied operand of + // SI_ELSE will introduce a copy of the tied operand source after the else. + insertPass(&PHIEliminationID, &SILowerControlFlowID, false); TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); } +void GCNPassConfig::addPostRegAlloc() { + addPass(&SIOptimizeExecMaskingID); + TargetPassConfig::addPostRegAlloc(); +} + void GCNPassConfig::addPreSched2() { } |