diff options
| author | Wei Ding <wei.ding2@amd.com> | 2017-04-12 23:51:47 +0000 |
|---|---|---|
| committer | Wei Ding <wei.ding2@amd.com> | 2017-04-12 23:51:47 +0000 |
| commit | 74da350b850efd10643ce9b76043e0e9df59a1b2 (patch) | |
| tree | d8a57f05977760ce3d1f9b06b597a14bde6f2552 /llvm/test/CodeGen | |
| parent | a13714ea5f8e95c5969271b74d490f0dd829cc71 (diff) | |
| download | bcm5719-llvm-74da350b850efd10643ce9b76043e0e9df59a1b2.tar.gz bcm5719-llvm-74da350b850efd10643ce9b76043e0e9df59a1b2.zip | |
AMDGPU : Fix common dominator of two incoming blocks terminates with uniform branch issue.
Differential Revision: http://reviews.llvm.org/D31350
llvm-svn: 300142
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_break.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sgprcopies.ll | 58 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 4 |
4 files changed, 65 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 492472155ee..b9df2cb779a 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -27,8 +27,9 @@ ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 ; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]] -; GCN: s_cmp_gt_i32 s{{[0-9]+}}, -1 -; GCN-NEXT: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]] +; GCN: v_cmp_lt_i32_e32 vcc, -1 +; GCN: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] ; GCN: ; BB#2: ; %bb4 ; GCN: buffer_load_dword diff --git a/llvm/test/CodeGen/AMDGPU/sgprcopies.ll b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll new file mode 100644 index 00000000000..68cd83bb6cf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}checkTwoBlocksWithUniformBranch +; GCN: BB0_2 +; GCN: v_add +define amdgpu_kernel void @checkTwoBlocksWithUniformBranch(i32 addrspace(1)* nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) { +entry: + %conv = call i32 @llvm.amdgcn.workitem.id.x() #1 + %rem = urem i32 %conv, %width + %div = udiv i32 %conv, %width + %conv1 = sitofp i32 %rem to float + %x = tail call float @llvm.fmuladd.f32(float %xStep, float %conv1, float %xPos) + %conv2 = sitofp i32 %div to float + %y = tail call float @llvm.fmuladd.f32(float %yStep, float %conv2, float %yPos) + %yy = fmul float %y, %y + %xy = tail call float @llvm.fmuladd.f32(float %x, float %x, float %yy) + %cmp01 = fcmp ole float %xy, 4.000000e+00 + %cmp02 = icmp ne i32 %maxIter, 0 + %cond01 = and i1 %cmp02, %cmp01 + br i1 %cond01, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %x_val = phi float [ %call8, %for.body ], [ %x, %for.body.preheader ] + %iter_val = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %y_val = phi float [ %call9, %for.body ], [ %y, %for.body.preheader ] + %sub = fsub float -0.000000e+00, %y_val + %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) #1 + %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) #1 + %mul = fmul float %x_val, 2.000000e+00 + %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) #1 + %inc = add nuw i32 %iter_val, 1 + %mul3 = fmul float %call9, %call9 + %0 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3) + %cmp = fcmp ole float %0, 4.000000e+00 + %cmp5 = icmp ult i32 %inc, %maxIter + %or.cond = and i1 %cmp5, %cmp + br i1 %or.cond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %iter.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %for.end.loopexit ] + %idxprom = ashr exact i32 %conv, 32 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idxprom + store i32 %iter.0.lcssa, i32 addrspace(1)* %arrayidx, align 4 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare float @llvm.fmuladd.f32(float, float, float) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { readnone } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll index e0067f9f45a..8a08f9d8bb0 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -7,11 +7,11 @@ ; CHECK: s_and_saveexec_b64 ; CHECK-NEXT: s_xor_b64 ; CHECK-NEXT: ; mask branch - +; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}} ; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader ; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: -; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]] +; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]] ; CHECK: s_endpgm define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) { diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index aad260c3e36..85a8929ebe5 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -172,8 +172,8 @@ exit: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: buffer_load_dword ; SI-DAG: buffer_store_dword -; SI-DAG: s_cmpk_eq_i32 s{{[0-9]+}}, 0x100 -; SI: s_cbranch_scc0 [[LABEL_LOOP]] +; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100 +; SI: s_cbranch_vccz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: ; SI: s_endpgm |

