diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wave32.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm.ll | 11 |
11 files changed, 118 insertions, 111 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index a425bcf2bdf..6a8456d99bc 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb ; Make sure scc liveness is updated if sor_b64 is removed ; ALL-LABEL: {{^}}scc_liveness: +; GCN: %bb10 +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execz + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] @@ -240,10 +245,6 @@ bb.end: ; preds = %bb.then, %bb ; GCN-NOT: s_or_b64 exec, exec ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} -; GCN: s_andn2_b64 -; GCN-NEXT: s_cbranch_execnz - -; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 2060ea42f16..c903a04039a 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -20,38 +20,41 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 ; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 -; CHECK-NEXT: BB0_1: ; %loop +; CHECK-NEXT: s_branch BB0_3 +; CHECK-NEXT: BB0_1: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] +; CHECK-NEXT: s_cbranch_execz BB0_6 +; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec -; CHECK-NEXT: s_cbranch_vccz BB0_5 -; CHECK-NEXT: ; %bb.2: ; %endif1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_cbranch_vccz BB0_2 +; CHECK-NEXT: ; %bb.4: ; %endif1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[6:7], -1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; CHECK-NEXT: ; mask branch BB0_4 -; CHECK-NEXT: BB0_3: ; %endif2 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; mask branch BB0_1 +; CHECK-NEXT: s_cbranch_execz BB0_1 +; CHECK-NEXT: BB0_5: ; %endif2 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 -; CHECK-NEXT: BB0_4: ; %Flow1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: BB0_5: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] -; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: s_branch BB0_1 +; CHECK-NEXT: BB0_6: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] @@ -62,6 +65,7 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm +; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll index be6e3fd05ae..1a675ce57bc 100644 --- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -1,27 +1,28 @@ ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: %bb11 +; CHECK-LABEL: %bb22 -; Load from %arg in a Loop body has alias store +; Load from %arg has alias store in Loop ; CHECK: flat_load_dword -; CHECK-LABEL: %bb20 -; CHECK: flat_store_dword +; ##################################################################### + +; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] + +; CHECK: s_load_dword ; ##################################################################### -; CHECK-LABEL: %bb22 +; CHECK-LABEL: %bb11 -; Load from %arg has alias store in Loop +; Load from %arg in a Loop body has alias store ; CHECK: flat_load_dword -; ##################################################################### - -; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] +; CHECK-LABEL: %bb20 -; CHECK: s_load_dword +; CHECK: flat_store_dword define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index ae78a1ecf32..a050bfe29bf 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,20 +3,20 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; +; SI: ; %Flow +; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec +; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] + ; SI: ; %for.body ; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4, -; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec ; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]] ; SI: ; %Flow1 ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec -; SI: ; %Flow -; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec -; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec -; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] - ; SI: ; %for.end ; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index dc7f495c42f..bde1cd5c435 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; GCN-LABEL: {{^}}broken_phi_bb: ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8 -; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]] - -; GCN: {{^BB[0-9]+_[0-9]+}}: -; GCN: s_mov_b64 exec, - -; GCN: [[BB2]]: +; GCN: [[BB2:BB[0-9]+_[0-9]+]]: ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] ; GCN: buffer_load_dword @@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; IDXMODE: s_set_gpr_idx_off ; GCN: s_cbranch_execnz [[REGLOOP]] + +; GCN: {{^; %bb.[0-9]}}: +; GCN: s_mov_b64 exec, +; GCN: s_branch [[BB2]] + define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index 486364acdb7..f374276aa60 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -61,9 +61,9 @@ loopexit: ; GCN-LABEL: {{^}}break_cond_is_arg: ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} +; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] -; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]], -; GCN: s_andn2_b64 exec, exec, [[REG3]] +; GCN: s_or_b64 [[REG3]], [[REG2]], define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index d243233119b..0ae28c6ef79 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -24,13 +24,29 @@ ; GCN: ; %main_body ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 +; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] + ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} ; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow +; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] +; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_cbranch_execz [[FLOW2]] + ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec -; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec -; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec +; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc ; FIXME: duplicate comparison ; GCN: ; %ENDIF @@ -43,23 +59,7 @@ ; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]] ; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]] -; GCN: ; %Flow -; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] -; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] -; GCN: s_cbranch_execnz [[INNER_LOOP]] - -; GCN: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0]] -; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] -; GCN: s_cbranch_execnz [[OUTER_LOOP]] - -; GCN: ; %IF +; GCN: [[IF_BLOCK]]: ; %IF ; GCN-NEXT: s_endpgm define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { main_body: @@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP ; GCN-LABEL: {{^}}multi_if_break_loop: ; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow4 +; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]] +; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]] +; GCN: s_andn2_b64 exec, exec, [[LEFT]] +; GCN-NEXT: s_cbranch_execz + ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}} -; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]] +; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]] ; GCN: ; %LeafBlock1 ; GCN: s_mov_b64 -; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: s_mov_b64 [[BREAK]], -1{{$}} ; GCN: ; %case1 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], @@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP ; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec ; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]] -; GCN: ; %Flow4 -; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]] -; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]] -; GCN: s_andn2_b64 exec, exec, [[LEFT]] -; GCN-NEXT: s_cbranch_execnz - define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index be5d8d47205..2be99267c4e 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -3,11 +3,11 @@ ; GCN-LABEL: {{^}}negated_cond: ; GCN: BB0_1: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB0_2: +; GCN: BB0_3: ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccnz BB0_4 +; GCN: s_cbranch_vccnz BB0_2 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) { bb: br label %bb1 @@ -36,11 +36,11 @@ bb4: ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB1_1: +; GCN: %bb4 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccz BB1_3 +; GCN: s_cbranch_vccnz BB1_1 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index ad68d300de0..14928749e9f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] -; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]] + +; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond +; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]] ; SI: [[CONVEX_EXIT:BB[0-9_]+]] ; SI: s_mov_b64 vcc, ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cbranch_vccnz [[INFLOOP]] + +; SI: s_cbranch_vccnz [[WHILELOOP]] ; SI: ; %if.else ; SI: buffer_store_dword -; SI: [[INFLOOP]]: -; SI: s_cbranch_vccnz [[CONVEX_EXIT]] - -; SI: ; %for.cond.preheader +; SI: [[FOR_COND_PH]]: ; %for.cond.preheader ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index e1da3cf8f46..a71ca5db765 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -166,30 +166,29 @@ endif: } ; GCN-LABEL: {{^}}test_loop_with_if: -; GCN: BB{{.*}}: ; %bb2 +; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} +; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}} +; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] +; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] +; GCN: s_cbranch_execz +; GCN: BB{{.*}}: ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: s_cbranch_execz -; GCN: BB{{.*}}: ; %bb5 -; GCN: BB{{.*}}: ; %Flow +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}} ; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] ; GCN: ; mask branch BB -; GCN: BB{{.*}}: ; %bb11 -; GCN: BB{{.*}}: ; %Flow1 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}} ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}] ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} ; GCN: ; mask branch BB -; GCN: BB{{.*}}: ; %bb10 -; GCN: BB{{.*}}: ; %bb13 -; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} -; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}} -; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] -; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] -; GCN: s_cbranch_execnz -; GCN: ; %bb1 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GCN: s_endpgm define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 { bb: @@ -231,17 +230,16 @@ bb13: ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: ; mask branch ; GCN: s_cbranch_execz -; GCN: BB{{.*}}: ; %.preheader -; GCN: ; %bb8 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_andn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, exec_lo ; GFX1064: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], exec ; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} ; GFX1032: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] ; GFX1064: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] -; GCN: BB{{.*}}: ; %Flow -; GCN: s_cbranch_execnz -; GCN: BB{{.*}}: ; %.loopexit +; GCN: s_cbranch_execz +; GCN: BB{{.*}}: define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -657,7 +655,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d ; GCN-LABEL: {{^}}test_loop_vcc: ; GFX1032: v_cmp_lt_f32_e32 vcc_lo, ; GFX1064: v_cmp_lt_f32_e32 vcc, -; GCN: s_cbranch_vccz +; GCN: s_cbranch_vccnz define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 { entry: br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 41ff30b4fdd..b827668950b 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -650,12 +650,15 @@ main_body: ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 -; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body -; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz [[LOOPHDR]] -; CHECK: ; %break +; CHECK: s_cbranch_vccnz +; CHECK: ; %body +; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: s_branch [[LOOPHDR]] + +; CHECK: ; %break ; CHECK: ; return define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { entry: |