diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/add_i1.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/inline-asm.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_break.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll | 47 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 94 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/select-opt.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sub_i1.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll | 2 |
15 files changed, 264 insertions, 141 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/add_i1.ll b/llvm/test/CodeGen/AMDGPU/add_i1.ll index fb3b69ca3bd..c5f7e3af5e3 100644 --- a/llvm/test/CodeGen/AMDGPU/add_i1.ll +++ b/llvm/test/CodeGen/AMDGPU/add_i1.ll @@ -21,8 +21,8 @@ define amdgpu_kernel void @add_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1) } ; GCN-LABEL: {{^}}add_i1_cf: -; GCN: v_cmp_ne_u32_e32 vcc, 0, {{v[0-9]+}} -; GCN-NEXT: s_not_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc +; GCN: ; %endif +; GCN: s_not_b64 define amdgpu_kernel void @add_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index 84a2d3d3a7b..ae78a1ecf32 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -1,19 +1,25 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}i1_copy_from_loop: ; -; Cannot use an SGPR mask to copy %cc out of the loop, since the mask would -; only contain the lanes that were active during the last loop iteration. -; ; SI: ; %for.body -; SI: v_cmp_gt_u32_e64 [[SREG:s\[[0-9]+:[0-9]+\]]], 4, -; SI: v_cndmask_b32_e64 [[VREG:v[0-9]+]], 0, -1, [[SREG]] -; SI-NEXT: s_cbranch_vccnz [[ENDIF:BB[0-9_]+]] -; SI: [[ENDIF]]: -; SI-NOT: [[VREG]] -; SI: ; %for.end -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VREG]] +; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4, +; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec +; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]] + +; SI: ; %Flow1 +; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec + +; SI: ; %Flow +; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] + +; SI: ; %for.end +; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]] + define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) { entry: br label %for.body diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll new file mode 100644 index 00000000000..0aacbbfda18 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_dont_clobber_scc: + +; GCN: ; %entry +; GCN: s_cmp_eq_u32 s0, 0 +; GCN: s_cbranch_scc1 [[PREEXIT:BB[0-9_]+]] + +; GCN: ; %blocka +; GCN: s_xor_b64 s[{{[0-9:]+}}], exec, -1 +; GCN: s_cmp_eq_u32 s1, 0 +; GCN: s_cbranch_scc1 [[EXIT:BB[0-9_]+]] + +; GCN: [[PREEXIT]]: +; GCN: [[EXIT]]: + +define amdgpu_vs float @test_dont_clobber_scc(i32 inreg %uni, i32 inreg %uni2) #0 { +entry: + %cc.uni = icmp eq i32 %uni, 0 + br i1 %cc.uni, label %exit, label %blocka + +blocka: + call void asm sideeffect "; dummy a", ""() + %cc.uni2 = icmp eq i32 %uni2, 0 + br i1 %cc.uni2, label %exit, label %blockb + +blockb: + call void asm sideeffect "; dummy b", ""() + br label %exit + +exit: + %cc.phi = phi i1 [ true, %entry ], [ false, %blocka ], [ false, %blockb ] + call void asm sideeffect "; dummy exit", ""() + %r = select i1 %cc.phi, float 1.0, float 2.0 + ret float %r +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll index 63a9f1feb6d..5b25271ce17 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -2,12 +2,16 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}br_i1_phi: -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} -; SI: s_and_saveexec_b64 -; SI: v_mov_b32_e32 [[REG]], -1{{$}} -; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]] -; SI: s_and_saveexec_b64 -; SI: s_endpgm + +; SI: ; %bb +; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], 0 + +; SI: ; %bb2 +; SI: s_mov_b64 [[TMP]], exec + +; SI: ; %bb3 +; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]] + define amdgpu_kernel void @br_i1_phi(i32 %arg) { bb: %tidig = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll index a0563cdd319..9615efaaa93 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -198,7 +198,8 @@ entry: } ; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: -; CHECK: v_mov_b32_e32 v0, -1{{$}} +; CHECK: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], -1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, [[MASK]] ; CHECK: ; use v0 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { entry: @@ -212,10 +213,14 @@ entry: ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK: ; use v0 +; CHECK: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK: v_cndmask_b32_e64 [[STORE:v[0-9]+]], 0, 1, vcc +; CHECK: {{buffer|flat}}_store_byte [[STORE]], define amdgpu_kernel void @i1_input_phys_vgpr() { entry: %val = load i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) + %cc = call i1 asm sideeffect "; use $1, def $0 ", "={v1}, {v0}"(i1 %val) + store i1 %cc, i1 addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll index 34b842d8436..63c1556212d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s ; FIXME: Enable for VI. @@ -144,20 +144,24 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace } ; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc: -; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc -; SI: buffer_load_dword [[LOAD:v[0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, [[LOAD]] -; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc +; SI: ; %entry +; SI: v_cmp_eq_u32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, {{v[0-9]+}} +; SI: s_mov_b64 vcc, 0 +; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[CMP]] +; SI: ; %bb +; SI: buffer_load_dword [[LOAD:v[0-9]+]], +; SI: v_cmp_ne_u32_e32 vcc, 0, [[LOAD]] +; SI: s_and_b64 vcc, vcc, exec + +; SI: ; %exit +; SI: s_or_b64 exec, exec, [[SAVE]] +; SI-NOT: vcc +; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +; SI: buffer_store_dword +; SI: s_endpgm -; SI: BB9_2: -; SI: s_or_b64 exec, exec, [[SAVE]] -; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} -; SI: buffer_store_dword -; SI: s_endpgm define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 576950188d3..f37b3a3637a 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -22,23 +22,28 @@ ; OPT: bb9: ; OPT: call void @llvm.amdgcn.end.cf(i64 -; TODO: Can remove exec fixes in return block ; GCN-LABEL: {{^}}break_loop: -; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 -; GCN: v_cmp_lt_i32_e32 vcc, -1 -; GCN: s_and_b64 vcc, exec, vcc -; GCN: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] - -; GCN: ; %bb.2: ; %bb4 -; GCN: buffer_load_dword -; GCN: v_cmp_ge_i32_e32 vcc, - -; GCN: [[FLOW]]: -; GCN: s_or_b64 [[MASK]], vcc, [[MASK]] -; GCN: s_andn2_b64 exec, exec, [[MASK]] -; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] +; GCN: v_cmp_lt_i32_e32 vcc, -1 +; GCN: s_and_b64 vcc, exec, vcc +; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec +; GCN: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] + +; GCN: ; %bb4 +; GCN: buffer_load_dword +; GCN: v_cmp_ge_i32_e32 vcc, +; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec +; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec +; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[TMP0]] + +; GCN: [[FLOW]]: ; %Flow +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[OUTER_MASK]] +; GCN: s_mov_b64 [[OUTER_MASK]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] ; GCN: ; %bb.4: ; %bb9 ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index fbdf9832b29..679fd7c9870 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -59,31 +59,48 @@ ; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret: -; GCN: v_cmp_lt_i32_e32 vcc, 1 -; GCN: s_and_saveexec_b64 -; GCN: s_xor_b64 +; GCN: s_mov_b64 [[EXIT1:s\[[0-9]+:[0-9]+\]]], 0 +; GCN: v_cmp_lt_i32_e32 vcc, 1, +; GCN: s_mov_b64 [[EXIT0:s\[[0-9]+:[0-9]+\]]], 0 +; GCN: s_and_saveexec_b64 +; GCN: s_xor_b64 + +; GCN: ; %LeafBlock1 +; GCN-NEXT: s_mov_b64 [[EXIT0]], exec +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, +; GCN-NEXT: s_and_b64 [[EXIT1]], vcc, exec + +; GCN: ; %Flow +; GCN-NEXT: s_or_saveexec_b64 +; GCN-NEXT: s_xor_b64 ; FIXME: Why is this compare essentially repeated? -; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]] -; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc -; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG]] -; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc +; GCN: ; %LeafBlock +; GCN-DAG: v_cmp_eq_u32_e32 vcc, 1, +; GCN-DAG: v_cmp_ne_u32_e64 [[TMP1:s\[[0-9]+:[0-9]+\]]], 1, +; GCN-DAG: s_andn2_b64 [[EXIT0]], [[EXIT0]], exec +; GCN-DAG: s_andn2_b64 [[EXIT1]], [[EXIT1]], exec +; GCN-DAG: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec +; GCN-DAG: s_and_b64 [[TMP1]], [[TMP1]], exec +; GCN-DAG: s_or_b64 [[EXIT0]], [[EXIT0]], [[TMP0]] +; GCN-DAG: s_or_b64 [[EXIT1]], [[EXIT1]], [[TMP1]] ; GCN: ; %Flow4 -; GCN-NEXT: s_or_b64 exec, exec -; GCN: v_cmp_ne_u32_e32 vcc, 0 +; GCN-NEXT: s_or_b64 exec, exec, +; GCN-NEXT: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[EXIT1]] +; GCN-NEXT: s_xor_b64 ; GCN: ; %exit1 -; GCN: ds_write_b32 +; GCN: ds_write_b32 +; GCN: s_andn2_b64 [[EXIT0]], [[EXIT0]], exec -; GCN: %Flow5 -; GCN-NEXT: s_or_b64 exec, exec -; GCN: v_cmp_ne_u32_e32 vcc, 0 -; GCN-NEXT: s_and_saveexec_b64 +; GCN: ; %Flow5 +; GCN-NEXT: s_or_b64 exec, exec, +; GCN-NEXT; s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[EXIT0]] ; GCN: ; %exit0 -; GCN: buffer_store_dword +; GCN: buffer_store_dword ; GCN: ; %UnifiedReturnBlock ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index c4e2f1e3487..4c1a769d599 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -21,34 +21,46 @@ ; GCN-LABEL: {{^}}multi_else_break: +; GCN: ; %main_body +; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} + ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} +; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_and_saveexec_b64 [[SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], vcc - -; GCN: BB{{[0-9]+}}_{{[0-9]+}}: ; %Flow{{$}} -; GCN-NEXT: ; in Loop: Header=[[INNER_LOOP]] Depth=2 - -; Ensure extra or eliminated -; GCN-NEXT: s_or_b64 exec, exec, [[SAVE_BREAK]] -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; GCN-NEXT: s_or_b64 [[OR_BREAK:s\[[0-9]+:[0-9]+\]]], vcc, s{{\[[0-9]+:[0-9]+\]}} -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; GCN-NEXT: v_mov_b32_e32 -; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]] -; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]] - -; GCN: ; %bb.{{[0-9]+}}: ; %Flow2{{$}} -; GCN-NEXT: ; in Loop: Header=[[OUTER_LOOP]] Depth=1 - -; Ensure copy is eliminated -; GCN-NEXT: s_or_b64 exec, exec, [[OR_BREAK]] -; GCN-NEXT: s_and_b64 [[MASKED2_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, vcc -; GCN-NEXT: s_or_b64 [[OUTER_OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED2_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}} -; GCN-NEXT: s_mov_b64 -; GCN-NEXT: v_mov_b32_e32 -; GCN-NEXT: s_andn2_b64 exec, exec, [[OUTER_OR_BREAK]] -; GCN-NEXT: s_cbranch_execnz [[OUTER_LOOP]] +; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec +; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc + +; FIXME: duplicate comparison +; GCN: ; %ENDIF +; GCN-DAG: v_cmp_eq_u32_e32 vcc, +; GCN-DAG: v_cmp_ne_u32_e64 [[TMP51NEG:s\[[0-9]+:[0-9]+\]]], +; GCN-DAG: s_andn2_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec +; GCN-DAG: s_andn2_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec +; GCN-DAG: s_and_b64 [[TMP_EQ:s\[[0-9]+:[0-9]+\]]], vcc, exec +; GCN-DAG: s_and_b64 [[TMP_NE:s\[[0-9]+:[0-9]+\]]], [[TMP51NEG]], exec +; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]] +; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]] + +; GCN: ; %Flow +; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] +; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] +; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] +; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_cbranch_execnz [[INNER_LOOP]] + +; GCN: ; %Flow2 +; GCN: s_or_b64 exec, exec, [[TMP0]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_cbranch_execnz [[OUTER_LOOP]] + +; GCN: ; %IF +; GCN-NEXT: s_endpgm define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { main_body: br label %LOOP.outer @@ -78,12 +90,38 @@ ENDIF: ; preds = %LOOP ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: -; GCN: s_mov_b64 [[BREAK_REG:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}} +; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]] + +; GCN: ; %LeafBlock1 +; GCN: s_mov_b64 +; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}} + +; GCN: ; %case1 +; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], +; GCN: v_cmp_ge_i32_e32 vcc, {{v[0-9]+}}, [[LOAD2]] +; GCN: s_orn2_b64 [[BREAK]], vcc, exec + +; GCN: ; %Flow3 +; GCN: s_branch [[FLOW:BB[0-9]+_[0-9]+]] + +; GCN: s_mov_b64 [[BREAK]], -1{{$}} + +; GCN: [[FLOW]]: ; %Flow + +; GCN: ; %case0 +; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], +; GCN-DAG: s_andn2_b64 [[BREAK]], [[BREAK]], exec +; GCN-DAG: v_cmp_ge_i32_e32 vcc, {{v[0-9]+}}, [[LOAD1]] +; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec +; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]] -; GCN: s_or_b64 [[BREAK_REG]], vcc, [[BREAK_REG]] -; GCN: s_andn2_b64 exec, exec, [[BREAK_REG]] +; GCN: ; %Flow4 +; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]] +; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]] +; GCN: s_andn2_b64 exec, exec, [[LEFT]] ; GCN-NEXT: s_cbranch_execnz define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll index 33028f17531..f773357976c 100644 --- a/llvm/test/CodeGen/AMDGPU/select-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll @@ -137,7 +137,6 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0 ; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}} ; GCN: v_cmp_eq_f32_e32 vcc, 0, v{{[0-9]+}} -; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll index 04df33b8dd4..3db6fd2d898 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -100,22 +100,22 @@ endif: ret void } -; FIXME: Should write to different SGPR pairs instead of copying to -; VALU for i1 phi. - ; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br: -; SI: buffer_load_dword [[AVAL:v[0-9]+]] -; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]] -; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]] - -; SI: BB{{[0-9]+}}_2: -; SI: buffer_load_dword [[AVAL:v[0-9]+]] -; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]] -; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]] - -; SI: v_cmp_ne_u32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]] -; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]] -; SI: buffer_store_dword [[RESULT]] + +; SI: ; %else +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_gt_i32_e64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0, [[AVAL]] + +; SI: ; %if +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]] +; SI-DAG: s_andn2_b64 [[PHI]], [[PHI]], exec +; SI-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP_ELSE]], exec +; SI: s_or_b64 [[PHI]], [[PHI]], [[TMP]] + +; SI: ; %endif +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[PHI]] +; SI: buffer_store_dword [[RESULT]], define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 73e56593ce8..6215a486a36 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: @@ -27,18 +27,23 @@ ENDIF: ; FUNC-LABEL: {{^}}phi_cond_outside_loop: -; FIXME: This could be folded into the s_or_b64 instruction -; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0 -; SI: [[LOOP_LABEL:[A-Z0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; SI_IF_BREAK instruction: -; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]] +; SI: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0 +; SI: s_mov_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0 -; SI_LOOP instruction: -; SI: s_andn2_b64 exec, exec, [[BREAK]] -; SI: s_cbranch_execnz [[LOOP_LABEL]] -; SI: s_endpgm +; SI: ; %else +; SI: v_cmp_eq_u32_e64 [[TMP:s\[[0-9]+:[0-9]+\]]], +; SI: s_and_b64 [[PHI]], [[TMP]], exec + +; SI: ; %endif + +; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop +; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]] +; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] +; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]] +; SI: s_andn2_b64 exec, exec, [[LEFT]] +; SI: s_cbranch_execnz [[LOOP_LABEL]] +; SI: s_endpgm define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { entry: @@ -90,19 +95,21 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; This broke the old AMDIL cfg structurizer ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} -; SI: s_and_b64 vcc, exec, [[CMP4]] -; SI-NEXT: s_cbranch_vccnz [[BR1:BB[0-9_]+]] -; SI-NEXT: s_branch [[BR2:BB[0-9_]+]] -; SI-NEXT: BB{{[0-9_]+}}: -; SI-NEXT: buffer_store_dword +; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] +; SI: s_mov_b64 vcc, [[CMP4M]] +; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]] +; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]] + +; SI: ; %if.else +; SI: buffer_store_dword ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]: -; SI: [[BR1]]: -; SI-NEXT: s_and_b64 vcc, exec, -; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] +; SI: [[CONVEX_EXIT]]: +; SI: s_mov_b64 vcc, +; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] ; SI: s_branch [[INFLOOP]] -; SI-NEXT: [[BR2]]: +; SI-NEXT: [[FOR_COND_PREHDR]]: ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/AMDGPU/sub_i1.ll b/llvm/test/CodeGen/AMDGPU/sub_i1.ll index 70562a59f0a..6861d32dccf 100644 --- a/llvm/test/CodeGen/AMDGPU/sub_i1.ll +++ b/llvm/test/CodeGen/AMDGPU/sub_i1.ll @@ -21,8 +21,8 @@ define amdgpu_kernel void @sub_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1) } ; GCN-LABEL: {{^}}sub_i1_cf: -; GCN: v_cmp_ne_u32_e32 vcc, 0, {{v[0-9]+}} -; GCN-NEXT: s_not_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc +; GCN: ; %endif +; GCN: s_not_b64 define amdgpu_kernel void @sub_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index 3d980b749a9..ca85f0bee4c 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -8,23 +8,22 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; waitcnt should be inserted after exec modification -; SI: v_cmp_lt_i32_e32 vcc, 0, -; SI: v_mov_b32_e32 {{v[0-9]+}}, 0 +; SI: v_cmp_lt_i32_e32 vcc, 0, +; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 +; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 ; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc ; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] ; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]] ; SI-NEXT: s_cbranch_execz [[FLOW_BB]] ; SI-NEXT: BB{{[0-9]+}}_1: ; %LeafBlock3 -; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 -; SI: v_mov_b32_e32 v{{[0-9]}}, -1 -; SI: s_and_saveexec_b64 +; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 +; SI: s_and_saveexec_b64 ; SI-NEXT: ; mask branch ; v_mov should be after exec modification ; SI: [[FLOW_BB]]: ; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]] -; SI-NEXT: v_mov_b32_e32 v{{[0-9]+}} ; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]] ; SI-NEXT: ; mask branch ; @@ -220,9 +219,10 @@ exit: ; SI: [[LABEL_FLOW]]: ; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] ; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; SI-NEXT: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] -; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], +; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]] +; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]] +; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]] ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll index a941e5fb1f7..08267b76aef 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}testKernel ; GCN: BB0_1: ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_eq_f32_e64 +; GCN-NEXT: v_cmp_eq_f32_e32 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_f32_e32 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) |

