diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-25 04:25:02 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-25 04:25:02 +0000 |
| commit | 9f5e0ef0c549a7374701b496e0bbe93425fb021f (patch) | |
| tree | 1fcc7fc1556cd041f0889768128f9762023d1297 /llvm/test | |
| parent | e7dbebf182b92b49883649b7ca3408892273598b (diff) | |
| download | bcm5719-llvm-9f5e0ef0c549a7374701b496e0bbe93425fb021f.tar.gz bcm5719-llvm-9f5e0ef0c549a7374701b496e0bbe93425fb021f.zip | |
AMDGPU: Implement early ifcvt target hooks.
Leave early ifcvt disabled for now since there are some
shader-db regressions.
This causes some immediate improvements, but could be better.
The cost checking that the pass does is based on critical path
length for out of order CPUs which we do not want so it skips out
on many cases we want.
llvm-svn: 293016
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll | 110 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/early-if-convert.ll | 454 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/uniform-cfg.ll | 4 |
4 files changed, 567 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll new file mode 100644 index 00000000000..d1624f86765 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll @@ -0,0 +1,110 @@ +; RUN: llc -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; FIXME: Most of these cases that don't trigger because of broken cost +; heuristics. Should not need -stress-early-ifcvt + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle64: +; GCN: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} +; GCN: v_cmp_neq_f64_e32 vcc, 1.0, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} +; GCN: v_add_f64 v{{\[}}[[ADD_LO:[0-9]+]]:[[ADD_HI:[0-9]+]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} +; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_LO:[0-9]+]], v[[ADD_LO]], v[[VAL_LO]], vcc +; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_HI:[0-9]+]], v[[ADD_HI]], v[[VAL_HI]], vcc +; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +define void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { +entry: + %v = load double, double addrspace(1)* %in + %cc = fcmp oeq double %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fadd double %v, %v + br label %endif + +endif: + %r = phi double [ %v, %entry ], [ %u, %if ] + store double %r, double addrspace(1)* %out + ret void +} + +; vcc branch with SGPR inputs +; GCN-LABEL: {{^}}test_vccnz_sgpr_ifcvt_triangle64: +; GCN: v_cmp_neq_f64 +; GCN: v_add_f64 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(2)* %in) #0 { +entry: + %v = load double, double addrspace(2)* %in + %cc = fcmp oeq double %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fadd double %v, %v + br label %endif + +endif: + %r = phi double [ %v, %entry ], [ %u, %if ] + store double %r, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle96: +; GCN: v_cmp_neq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 + +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: s_mov_b64 vcc, [[CMP]] + +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc + +; GCN-DAG: buffer_store_dword v +; GCN-DAG: buffer_store_dwordx2 +define void @test_vccnz_ifcvt_triangle96(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in, float %cnd) #0 { +entry: + %v = load <3 x i32>, <3 x i32> addrspace(1)* %in + %cc = fcmp oeq float %cnd, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = add <3 x i32> %v, %v + br label %endif + +endif: + %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ] + store <3 x i32> %r, <3 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle128: +; GCN: v_cmp_neq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 + +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: s_mov_b64 vcc, [[CMP]] + +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc + +; GCN: buffer_store_dwordx4 +define void @test_vccnz_ifcvt_triangle128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, float %cnd) #0 { +entry: + %v = load <4 x i32>, <4 x i32> addrspace(1)* %in + %cc = fcmp oeq float %cnd, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = add <4 x i32> %v, %v + br label %endif + +endif: + %r = phi <4 x i32> [ %v, %entry ], [ %u, %if ] + store <4 x i32> %r, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll new file mode 100644 index 00000000000..5ae1db8c686 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -0,0 +1,454 @@ +; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; XUN: llc -march=amdgcn -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; FIXME: This leaves behind a now unnecessary and with exec + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle: +; GCN: buffer_load_dword [[VAL:v[0-9]+]] +; GCN: v_cmp_neq_f32_e32 vcc, 1.0, [[VAL]] +; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[VAL]], vcc +; GCN: buffer_store_dword [[RESULT]] +define void @test_vccnz_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fadd float %v, %v + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_diamond: +; GCN: buffer_load_dword [[VAL:v[0-9]+]] +; GCN: v_cmp_neq_f32_e32 vcc, 1.0, [[VAL]] +; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]] +; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VAL]], [[VAL]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[MUL]], vcc +; GCN: buffer_store_dword [[RESULT]] +define void @test_vccnz_ifcvt_diamond(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %else + +if: + %u0 = fadd float %v, %v + br label %endif + +else: + %u1 = fmul float %v, %v + br label %endif + +endif: + %r = phi float [ %u0, %if ], [ %u1, %else ] + store float %r, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_vcc_clobber: +; GCN: ; clobber vcc +; GCN: v_cmp_neq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 +; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc +; GCN: s_mov_b64 vcc, [[CMP]] +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc +define void @test_vccnz_ifcvt_triangle_vcc_clobber(i32 addrspace(1)* %out, i32 addrspace(1)* %in, float %k) #0 { +entry: + %v = load i32, i32 addrspace(1)* %in + %cc = fcmp oeq float %k, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + call void asm "; clobber $0", "~{VCC}"() #0 + %u = add i32 %v, %v + br label %endif + +endif: + %r = phi i32 [ %v, %entry ], [ %u, %if ] + store i32 %r, i32 addrspace(1)* %out + ret void +} + +; Longest chain of cheap instructions to convert +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_max_cheap: +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_cndmask_b32_e32 +define void @test_vccnz_ifcvt_triangle_max_cheap(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u.0 = fmul float %v, %v + %u.1 = fmul float %v, %u.0 + %u.2 = fmul float %v, %u.1 + %u.3 = fmul float %v, %u.2 + %u.4 = fmul float %v, %u.3 + %u.5 = fmul float %v, %u.4 + %u.6 = fmul float %v, %u.5 + %u.7 = fmul float %v, %u.6 + %u.8 = fmul float %v, %u.7 + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u.8, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; Short chain of cheap instructions to not convert +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_min_expensive: +; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_vccnz_ifcvt_triangle_min_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u.0 = fmul float %v, %v + %u.1 = fmul float %v, %u.0 + %u.2 = fmul float %v, %u.1 + %u.3 = fmul float %v, %u.2 + %u.4 = fmul float %v, %u.3 + %u.5 = fmul float %v, %u.4 + %u.6 = fmul float %v, %u.5 + %u.7 = fmul float %v, %u.6 + %u.8 = fmul float %v, %u.7 + %u.9 = fmul float %v, %u.8 + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u.9, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; Should still branch over fdiv expansion +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_expensive: +; GCN: v_cmp_neq_f32_e32 +; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: v_div_scale_f32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_vccnz_ifcvt_triangle_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fdiv float %v, %v + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; vcc branch with SGPR inputs +; GCN-LABEL: {{^}}test_vccnz_sgpr_ifcvt_triangle: +; GCN: v_cmp_neq_f32_e64 +; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: s_add_i32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(2)* %in, float %cnd) #0 { +entry: + %v = load i32, i32 addrspace(2)* %in + %cc = fcmp oeq float %cnd, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = add i32 %v, %v + br label %endif + +endif: + %r = phi i32 [ %v, %entry ], [ %u, %if ] + store i32 %r, i32 addrspace(1)* %out + ret void + +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load: +; GCN: v_cndmask_b32 +define void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(2)* %in) #0 { +entry: + %v = load float, float addrspace(2)* %in + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fadd float %v, %v + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; Due to broken cost heuristic, this is not if converted like +; test_vccnz_ifcvt_triangle_constant_load even though it should be. + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_argload: +; GCN: v_cndmask_b32 +define void @test_vccnz_ifcvt_triangle_argload(float addrspace(1)* %out, float %v) #0 { +entry: + %cc = fcmp oeq float %v, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = fadd float %v, %v + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; Scalar branch and scalar inputs +; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle: +; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]] +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 +; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]] +define void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(2)* %in, i32 %cond) #0 { +entry: + %v = load i32, i32 addrspace(2)* %in + %cc = icmp eq i32 %cond, 1 + br i1 %cc, label %if, label %endif + +if: + %u = add i32 %v, %v + br label %endif + +endif: + %r = phi i32 [ %v, %entry ], [ %u, %if ] + call void asm sideeffect "; reg use $0", "s"(i32 %r) #0 + ret void +} + +; FIXME: Should be able to use VALU compare and select +; Scalar branch but VGPR select operands +; GCN-LABEL: {{^}}test_scc1_vgpr_ifcvt_triangle: +; GCN: s_cmp_lg_u32 +; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: v_add_f32_e32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_scc1_vgpr_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in, i32 %cond) #0 { +entry: + %v = load float, float addrspace(1)* %in + %cc = icmp eq i32 %cond, 1 + br i1 %cc, label %if, label %endif + +if: + %u = fadd float %v, %v + br label %endif + +endif: + %r = phi float [ %v, %entry ], [ %u, %if ] + store float %r, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle64: +; GCN: s_add_u32 +; GCN: s_addc_u32 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 +; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +define void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(2)* %in, i32 %cond) #0 { +entry: + %v = load i64, i64 addrspace(2)* %in + %cc = icmp eq i32 %cond, 1 + br i1 %cc, label %if, label %endif + +if: + %u = add i64 %v, %v + br label %endif + +endif: + %r = phi i64 [ %v, %entry ], [ %u, %if ] + call void asm sideeffect "; reg use $0", "s"(i64 %r) #0 + ret void +} + +; TODO: Can do s_cselect_b64; s_cselect_b32 +; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle96: +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 +; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +define void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(2)* %in, i32 %cond) #0 { +entry: + %v = load <3 x i32>, <3 x i32> addrspace(2)* %in + %cc = icmp eq i32 %cond, 1 + br i1 %cc, label %if, label %endif + +if: + %u = add <3 x i32> %v, %v + br label %endif + +endif: + %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ] + %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0 + ret void +} + +; GCN-LABEL: {{^}}test_scc1_sgpr_ifcvt_triangle128: +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 +; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +define void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(2)* %in, i32 %cond) #0 { +entry: + %v = load <4 x i32>, <4 x i32> addrspace(2)* %in + %cc = icmp eq i32 %cond, 1 + br i1 %cc, label %if, label %endif + +if: + %u = add <4 x i32> %v, %v + br label %endif + +endif: + %r = phi <4 x i32> [ %v, %entry ], [ %u, %if ] + call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r) #0 + ret void +} + +; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select: +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 +; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}} +define void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + br i1 %cmp0, label %else, label %if + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}ifcvt_undef_scc: +; GCN: {{^}}; BB#0: +; GCN-NEXT: s_load_dwordx2 +; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0 +define void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) { +entry: + br i1 undef, label %else, label %if + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle256: +; GCN: v_cmp_neq_f32 +; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: v_add_i32 +; GCN: v_add_i32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_vccnz_ifcvt_triangle256(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in, float %cnd) #0 { +entry: + %v = load <8 x i32>, <8 x i32> addrspace(1)* %in + %cc = fcmp oeq float %cnd, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = add <8 x i32> %v, %v + br label %endif + +endif: + %r = phi <8 x i32> [ %v, %entry ], [ %u, %if ] + store <8 x i32> %r, <8 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle512: +; GCN: v_cmp_neq_f32 +; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]] + +; GCN: v_add_i32 +; GCN: v_add_i32 + +; GCN: [[ENDIF]]: +; GCN: buffer_store_dword +define void @test_vccnz_ifcvt_triangle512(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in, float %cnd) #0 { +entry: + %v = load <16 x i32>, <16 x i32> addrspace(1)* %in + %cc = fcmp oeq float %cnd, 1.000000e+00 + br i1 %cc, label %if, label %endif + +if: + %u = add <16 x i32> %v, %v + br label %endif + +endif: + %r = phi <16 x i32> [ %v, %entry ], [ %u, %if ] + store <16 x i32> %r, <16 x i32> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll index 93a2c6998be..eb6007f21c1 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll @@ -14,6 +14,7 @@ main_body: if: %u = fadd float %v, %v + call void asm sideeffect "", ""() #0 ; Prevent ifconversion br label %else else: diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index 154ac361e79..d3e431d1e35 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uniform_if_scc: ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 |

