diff options
3 files changed, 100 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 4e0cc736bad..f88e3b0dac8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -117,14 +117,8 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {  }  void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { -  if (I.isUnconditional()) -    return; - -  Value *Cond = I.getCondition(); -  if (!DA->isUniform(Cond)) -    return; - -  setUniformMetadata(I.getParent()->getTerminator()); +  if (DA->isUniform(&I)) +    setUniformMetadata(I.getParent()->getTerminator());  }  void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index 90f430d5ca4..98e9ea66232 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -155,7 +155,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {  /// Is the branch condition uniform or did the StructurizeCFG pass  /// consider it as such?  bool SIAnnotateControlFlow::isUniform(BranchInst *T) { -  return DA->isUniform(T->getCondition()) || +  return DA->isUniform(T) ||           T->getMetadata("structurizecfg.uniform") != nullptr;  } diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll new file mode 100644 index 00000000000..8d21050ebee --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; This module creates a divergent branch. The branch is marked as divergent by +; the divergence analysis but the condition is not. This test ensures that the +; divergence of the branch is tested, not its condition, so that branch is +; correctly emitted as divergent. + +target triple = "amdgcn-mesa-mesa3d" + +define amdgpu_ps void @main(i32, float) { +; CHECK-LABEL: main: +; CHECK:       ; %bb.0: ; %start +; CHECK-NEXT:    v_readfirstlane_b32 s0, v0 +; CHECK-NEXT:    s_mov_b32 m0, s0 +; CHECK-NEXT:    s_mov_b64 s[4:5], 0 +; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x +; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0 +; CHECK-NEXT:    v_mov_b32_e32 v1, 0 +; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7 +; CHECK-NEXT:  BB0_1: ; %loop +; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1 +; CHECK-NEXT:    s_and_b64 vcc, exec, vcc +; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec +; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec +; CHECK-NEXT:    s_cbranch_vccz BB0_5 +; CHECK-NEXT:  ; %bb.2: ; %endif1 +; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT:    s_mov_b64 s[6:7], -1 +; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1] +; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9] +; CHECK-NEXT:    ; mask branch BB0_4 +; CHECK-NEXT:  BB0_3: ; %endif2 +; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1 +; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1 +; CHECK-NEXT:  BB0_4: ; %Flow1 +; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT:    s_branch BB0_6 +; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT:    ; implicit-def: $vgpr1 +; CHECK-NEXT:  BB0_6: ; %Flow +; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5] +; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9] +; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT:    s_cbranch_execnz BB0_1 +; CHECK-NEXT:  ; %bb.7: ; %Flow2 +; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT:    v_mov_b32_e32 v1, 0 +; this is the divergent branch with the condition not marked as divergent +; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3] +; CHECK-NEXT:    ; mask branch BB0_9 +; CHECK-NEXT:  BB0_8: ; %if1 +; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0 +; CHECK-NEXT:  BB0_9: ; %endloop +; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1] +; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm +; CHECK-NEXT:    s_endpgm +start: +  %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) +  br label %loop + +loop: +  %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ] +  %v2 = icmp ugt i32 %v1, 31 +  br i1 %v2, label %if1, label %endif1 + +if1: +  %v3 = call float @llvm.sqrt.f32(float %v0) +  br label %endloop + +endif1: +  %v4 = fcmp ogt float %v0, 0.000000e+00 +  br i1 %v4, label %endloop, label %endif2 + +endif2: +  %v5 = add i32 %v1, 1 +  br label %loop + +endloop: +  %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ] +  call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true) +  ret void +} + +declare float @llvm.sqrt.f32(float) #1 +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 +declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }  | 

