diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll | 41 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hoist-cond.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_break.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/madmk.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm.ll | 11 |
14 files changed, 100 insertions, 94 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index a425bcf2bdf..6a8456d99bc 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb ; Make sure scc liveness is updated if sor_b64 is removed ; ALL-LABEL: {{^}}scc_liveness: +; GCN: %bb10 +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execz + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] @@ -240,10 +245,6 @@ bb.end: ; preds = %bb.then, %bb ; GCN-NOT: s_or_b64 exec, exec ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} -; GCN: s_andn2_b64 -; GCN-NEXT: s_cbranch_execnz - -; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 8d21050ebee..01cd11a32d8 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -19,38 +19,39 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 -; CHECK-NEXT: BB0_1: ; %loop +; CHECK-NEXT: s_branch BB0_3 +; CHECK-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB0_7 +; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_cbranch_vccz BB0_5 -; CHECK-NEXT: ; %bb.2: ; %endif1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_cbranch_vccz BB0_1 +; CHECK-NEXT: ; %bb.4: ; %endif1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[6:7], -1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; CHECK-NEXT: ; mask branch BB0_4 -; CHECK-NEXT: BB0_3: ; %endif2 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; mask branch BB0_6 +; CHECK-NEXT: BB0_5: ; %endif2 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 -; CHECK-NEXT: BB0_4: ; %Flow1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: BB0_6: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_branch BB0_6 -; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: ; implicit-def: $vgpr1 -; CHECK-NEXT: BB0_6: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.7: ; %Flow2 +; CHECK-NEXT: s_branch BB0_2 +; CHECK-NEXT: BB0_7: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; this is the divergent branch with the condition not marked as divergent diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll index be6e3fd05ae..1a675ce57bc 100644 --- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -1,27 +1,28 @@ ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: %bb11 +; CHECK-LABEL: %bb22 -; Load from %arg in a Loop body has alias store +; Load from %arg has alias store in Loop ; CHECK: flat_load_dword -; CHECK-LABEL: %bb20 -; CHECK: flat_store_dword +; ##################################################################### + +; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] + +; CHECK: s_load_dword ; ##################################################################### -; CHECK-LABEL: %bb22 +; CHECK-LABEL: %bb11 -; Load from %arg has alias store in Loop +; Load from %arg in a Loop body has alias store ; CHECK: flat_load_dword -; ##################################################################### - -; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] +; CHECK-LABEL: %bb20 -; CHECK: s_load_dword +; CHECK: flat_store_dword define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll index 6fe53e718f6..76a26882987 100644 --- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s ; Check that invariant compare is hoisted out of the loop. ; At the same time condition shall not be serialized into a VGPR and deserialized later diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index ae78a1ecf32..a050bfe29bf 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,20 +3,20 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; +; SI: ; %Flow +; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec +; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] + ; SI: ; %for.body ; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4, -; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec ; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]] ; SI: ; %Flow1 ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec -; SI: ; %Flow -; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec -; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec -; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] - ; SI: ; %for.end ; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index dc7f495c42f..bde1cd5c435 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; GCN-LABEL: {{^}}broken_phi_bb: ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8 -; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]] - -; GCN: {{^BB[0-9]+_[0-9]+}}: -; GCN: s_mov_b64 exec, - -; GCN: [[BB2]]: +; GCN: [[BB2:BB[0-9]+_[0-9]+]]: ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] ; GCN: buffer_load_dword @@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; IDXMODE: s_set_gpr_idx_off ; GCN: s_cbranch_execnz [[REGLOOP]] + +; GCN: {{^; %bb.[0-9]}}: +; GCN: s_mov_b64 exec, +; GCN: s_branch [[BB2]] + define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index b4436fc42fa..fd634689fcd 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -1,5 +1,5 @@ ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s ; Uses llvm.amdgcn.break diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index 486364acdb7..f374276aa60 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -61,9 +61,9 @@ loopexit: ; GCN-LABEL: {{^}}break_cond_is_arg: ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} +; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] -; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]], -; GCN: s_andn2_b64 exec, exec, [[REG3]] +; GCN: s_or_b64 [[REG3]], [[REG2]], define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll index e06c5e8f485..28df8c8f3db 100644 --- a/llvm/test/CodeGen/AMDGPU/madmk.ll +++ b/llvm/test/CodeGen/AMDGPU/madmk.ll @@ -188,9 +188,9 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias } ; SI-LABEL: {{^}}kill_madmk_verifier_error: +; SI: s_or_b64 ; SI: s_xor_b64 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} -; SI: s_or_b64 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index d243233119b..0ae28c6ef79 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -24,13 +24,29 @@ ; GCN: ; %main_body ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 +; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] + ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} ; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow +; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] +; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_cbranch_execz [[FLOW2]] + ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec -; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec -; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec +; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc ; FIXME: duplicate comparison ; GCN: ; %ENDIF @@ -43,23 +59,7 @@ ; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]] ; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]] -; GCN: ; %Flow -; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] -; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] -; GCN: s_cbranch_execnz [[INNER_LOOP]] - -; GCN: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0]] -; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] -; GCN: s_cbranch_execnz [[OUTER_LOOP]] - -; GCN: ; %IF +; GCN: [[IF_BLOCK]]: ; %IF ; GCN-NEXT: s_endpgm define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { main_body: @@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP ; GCN-LABEL: {{^}}multi_if_break_loop: ; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow4 +; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]] +; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]] +; GCN: s_andn2_b64 exec, exec, [[LEFT]] +; GCN-NEXT: s_cbranch_execz + ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}} -; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]] +; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]] ; GCN: ; %LeafBlock1 ; GCN: s_mov_b64 -; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: s_mov_b64 [[BREAK]], -1{{$}} ; GCN: ; %case1 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], @@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP ; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec ; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]] -; GCN: ; %Flow4 -; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]] -; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]] -; GCN: s_andn2_b64 exec, exec, [[LEFT]] -; GCN-NEXT: s_cbranch_execnz - define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index be5d8d47205..2be99267c4e 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -3,11 +3,11 @@ ; GCN-LABEL: {{^}}negated_cond: ; GCN: BB0_1: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB0_2: +; GCN: BB0_3: ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccnz BB0_4 +; GCN: s_cbranch_vccnz BB0_2 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) { bb: br label %bb1 @@ -36,11 +36,11 @@ bb4: ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB1_1: +; GCN: %bb4 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccz BB1_3 +; GCN: s_cbranch_vccnz BB1_1 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index ad68d300de0..14928749e9f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] -; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]] + +; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond +; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]] ; SI: [[CONVEX_EXIT:BB[0-9_]+]] ; SI: s_mov_b64 vcc, ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cbranch_vccnz [[INFLOOP]] + +; SI: s_cbranch_vccnz [[WHILELOOP]] ; SI: ; %if.else ; SI: buffer_store_dword -; SI: [[INFLOOP]]: -; SI: s_cbranch_vccnz [[CONVEX_EXIT]] - -; SI: ; %for.cond.preheader +; SI: [[FOR_COND_PH]]: ; %for.cond.preheader ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index 3a9970e78e3..718b1b5238f 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 41ff30b4fdd..b827668950b 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -650,12 +650,15 @@ main_body: ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 -; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body -; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz [[LOOPHDR]] -; CHECK: ; %break +; CHECK: s_cbranch_vccnz +; CHECK: ; %body +; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: s_branch [[LOOPHDR]] + +; CHECK: ; %break ; CHECK: ; return define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { entry: |