summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Timofeev <Alexander.Timofeev@amd.com>2019-07-02 17:59:44 +0000
committerAlexander Timofeev <Alexander.Timofeev@amd.com>2019-07-02 17:59:44 +0000
commit2ce560f029d1c1ba6ce2ad0f4d4ffad78aeaeb1a (patch)
treeed734d8b07c73178838034362baf02ab553c1530
parentd42f22997e4b74badf62bf7cdbcb74eb091ba5ce (diff)
downloadbcm5719-llvm-2ce560f029d1c1ba6ce2ad0f4d4ffad78aeaeb1a.tar.gz
bcm5719-llvm-2ce560f029d1c1ba6ce2ad0f4d4ffad78aeaeb1a.zip
[AMDGPU] LCSSA pass added in preISel. Uniform values defined in the divergent loop and used outside
Differential Revision: https://reviews.llvm.org/D63953 Reviewers: rampitec, nhaehnle, arsenm llvm-svn: 364950
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll47
3 files changed, 44 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c8d4557729b..9b83b5081c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -39,6 +39,9 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Dominators.h"
+#endif
#include "llvm/IR/Instruction.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Casting.h"
@@ -138,6 +141,10 @@ public:
AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<AMDGPUPerfHintAnalysis>();
AU.addRequired<LegacyDivergenceAnalysis>();
+#ifndef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
SelectionDAGISel::getAnalysisUsage(AU);
}
@@ -351,6 +358,10 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+#ifdef EXPENSIVE_CHECKS
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+#endif
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
@@ -369,6 +380,13 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+#ifdef EXPENSIVE_CHECKS
+ DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ for (auto &L : LI->getLoopsInPreorder()) {
+ assert(L->isLCSSAForm(DT));
+ }
+#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b10f53cfbc4..8e4f35abfe2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -835,6 +835,7 @@ bool GCNPassConfig::addPreISel() {
if (!LateCFGStructurize) {
addPass(createSIAnnotateControlFlowPass());
}
+ addPass(createLCSSAPass());
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 01cd11a32d8..c903a04039a 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -17,52 +17,55 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: s_branch BB0_3
-; CHECK-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: ; implicit-def: $vgpr1
+; CHECK-NEXT: BB0_1: ; %Flow1
+; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: BB0_2: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
-; CHECK-NEXT: s_cbranch_execz BB0_7
+; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
+; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec
+; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
+; CHECK-NEXT: s_cbranch_execz BB0_6
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT: s_cbranch_vccz BB0_1
+; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec
+; CHECK-NEXT: s_cbranch_vccz BB0_2
; CHECK-NEXT: ; %bb.4: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[6:7], -1
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; CHECK-NEXT: ; mask branch BB0_6
+; CHECK-NEXT: ; mask branch BB0_1
+; CHECK-NEXT: s_cbranch_execz BB0_1
; CHECK-NEXT: BB0_5: ; %endif2
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
-; CHECK-NEXT: BB0_6: ; %Flow1
-; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT: s_branch BB0_2
-; CHECK-NEXT: BB0_7: ; %Flow2
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_branch BB0_1
+; CHECK-NEXT: BB0_6: ; %Flow2
+; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; this is the divergent branch with the condition not marked as divergent
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
-; CHECK-NEXT: ; mask branch BB0_9
-; CHECK-NEXT: BB0_8: ; %if1
+; CHECK-NEXT: ; mask branch BB0_8
+; CHECK-NEXT: BB0_7: ; %if1
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT: BB0_9: ; %endloop
+; CHECK-NEXT: BB0_8: ; %endloop
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT: s_endpgm
+; this is the divergent branch with the condition not marked as divergent
start:
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
br label %loop
OpenPOWER on IntegriCloud