diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll | 47 |
3 files changed, 44 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c8d4557729b..9b83b5081c8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -39,6 +39,9 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/Dominators.h" +#endif #include "llvm/IR/Instruction.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Casting.h" @@ -138,6 +141,10 @@ public: AU.addRequired<AMDGPUArgumentUsageInfo>(); AU.addRequired<AMDGPUPerfHintAnalysis>(); AU.addRequired<LegacyDivergenceAnalysis>(); +#ifndef EXPENSIVE_CHECKS + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); +#endif SelectionDAGISel::getAnalysisUsage(AU); } @@ -351,6 +358,10 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) +#ifdef EXPENSIVE_CHECKS +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +#endif INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) @@ -369,6 +380,13 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { +#ifdef EXPENSIVE_CHECKS + DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + for (auto &L : LI->getLoopsInPreorder()) { + assert(L->isLCSSAForm(DT)); + } +#endif Subtarget = &MF.getSubtarget<GCNSubtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b10f53cfbc4..8e4f35abfe2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -835,6 +835,7 @@ bool GCNPassConfig::addPreISel() { if (!LateCFGStructurize) { addPass(createSIAnnotateControlFlowPass()); } + addPass(createLCSSAPass()); return false; } diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 01cd11a32d8..c903a04039a 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -17,52 +17,55 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 +; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 ; CHECK-NEXT: s_branch BB0_3 -; CHECK-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: BB0_1: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_cbranch_execz BB0_7 +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] +; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_cbranch_vccz BB0_1 +; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec +; CHECK-NEXT: s_cbranch_vccz BB0_2 ; CHECK-NEXT: ; %bb.4: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[6:7], -1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; CHECK-NEXT: ; mask branch BB0_6 +; CHECK-NEXT: ; mask branch BB0_1 +; CHECK-NEXT: s_cbranch_execz BB0_1 ; CHECK-NEXT: BB0_5: ; %endif2 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 -; CHECK-NEXT: BB0_6: ; %Flow1 -; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_branch BB0_2 -; CHECK-NEXT: BB0_7: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_branch BB0_1 +; CHECK-NEXT: BB0_6: ; %Flow2 +; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; this is the divergent branch with the condition not marked as divergent ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] -; CHECK-NEXT: ; mask branch BB0_9 -; CHECK-NEXT: BB0_8: ; %if1 +; CHECK-NEXT: ; mask branch BB0_8 +; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 -; CHECK-NEXT: BB0_9: ; %endloop +; CHECK-NEXT: BB0_8: ; %endloop ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm +; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) br label %loop |