diff options
author | vpykhtin <valery.pykhtin@gmail.com> | 2019-11-18 20:06:48 +0300 |
---|---|---|
committer | vpykhtin <valery.pykhtin@gmail.com> | 2019-11-26 18:59:37 +0300 |
commit | 008e65a7bfb320bf197a04ff6427da84f8d38b76 (patch) | |
tree | c461bfbf3ac9dcc0e0dea04f97f46bf3e71a35b8 /llvm/test | |
parent | a913e872d6e7044ae77e55c45ab3ea5304eb7262 (diff) | |
download | bcm5719-llvm-008e65a7bfb320bf197a04ff6427da84f8d38b76.tar.gz bcm5719-llvm-008e65a7bfb320bf197a04ff6427da84f8d38b76.zip |
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier.
Differential revision: https://reviews.llvm.org/D70405
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/loop_break.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wave32.ll | 14 |
7 files changed, 40 insertions, 48 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 0dec67ad340..895539c00bc 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5 ; CHECK-NEXT: s_branch BB0_3 ; CHECK-NEXT: BB0_1: ; %Flow1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_mov_b64 s[10:11], 0 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3] +; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] ; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_cmp_lt_u32 s0, 32 -; CHECK-NEXT: s_mov_b64 s[10:11], -1 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 ; CHECK-NEXT: s_cbranch_scc0 BB0_2 ; CHECK-NEXT: ; %bb.4: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 ; CHECK-NEXT: BB0_6: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] +; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] ; CHECK-NEXT: ; mask branch BB0_8 ; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 @@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm + ; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index fff1c22918e..51d1c091ab9 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,11 +3,10 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; -; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1 -; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: ; %Flow +; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec -; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]] +; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]] ; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]] ; SI: ; %for.body diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 46c4b1e6b3a..684b183de69 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -40,10 +40,9 @@ ; GCN: [[FLOW]]: ; %Flow ; GCN: ; in Loop: Header=BB0_1 Depth=1 -; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]] -; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]] -; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]] +; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] +; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]] +; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]] ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] ; GCN: ; %bb.4: ; %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 08d8ec0fba4..5222ae56db8 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -25,22 +25,20 @@ ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]] ; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]] ; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} -; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}} ; GCN: ; %Flow ; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] -; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]] ; GCN: s_cbranch_execz [[FLOW2]] ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} @@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: -; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4 -; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]] -; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}} +; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]] +; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec +; GCN: s_andn2_b64 exec, exec, [[MASK1]] ; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]] ; GCN: ; %bb1{{$}} ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], -; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]] ; GCN: ; %LeafBlock1 ; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]] @@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP ; GCN: s_branch [[LOOP]] ; GCN: [[LOOP_EXIT]]: ; %Flow6 -; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_or_b64 exec, exec, [[SAVED_MASK]] define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 14d78fbef29..23bb18e738f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -37,9 +37,8 @@ ENDIF: ; SI: ; %endif ; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]] ; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] -; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]] +; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]] ; SI: s_andn2_b64 exec, exec, [[LEFT]] ; SI: s_cbranch_execnz [[LOOP_LABEL]] ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index ef17825024e..ea74268dbe7 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -223,9 +223,8 @@ exit: ; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] ; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] ; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], -; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]] -; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]] -; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]] +; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]] +; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 91a99318197..92808fec360 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -243,14 +243,12 @@ bb13: ; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]] ; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]] ; GCN: BB{{.*}}: ; %Flow -; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]] -; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]] -; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]] -; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]] -; GFX1032: s_mov_b32 [[ACC]], [[MASK0]] -; GFX1064: s_mov_b64 [[ACC]], [[MASK0]] -; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]] -; GFX1064: s_andn2_b64 exec, exec, [[MASK0]] +; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]] +; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]] +; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]] +; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]] +; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]] +; GFX1064: s_andn2_b64 exec, exec, [[ACC]] ; GCN: s_cbranch_execz ; GCN: BB{{.*}}: ; GCN: s_load_dword [[LOAD:s[0-9]+]] |