diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-22 19:33:16 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-22 19:33:16 +0000 |
commit | 78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch) | |
tree | 0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/test | |
parent | 88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff) | |
download | bcm5719-llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz bcm5719-llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip |
AMDGPU: Split SILowerControlFlow into two pieces
Do most of the lowering in a pre-RA pass. Keep the skip jump
insertion late, plus a few other things that require more
work to move out.
One concern I have is now there may be COPY instructions
which do not have the necessary implicit exec uses
if they will be lowered to v_mov_b32.
This has a positive effect on SGPR usage in shader-db.
llvm-svn: 279464
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/else.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 41 |
2 files changed, 37 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll index bb885ac3884..ef1e64763d4 100644 --- a/llvm/test/CodeGen/AMDGPU/else.ll +++ b/llvm/test/CodeGen/AMDGPU/else.ll @@ -25,11 +25,13 @@ end: } ; CHECK-LABEL: {{^}}else_execfix_leave_wqm: +; CHECK: ; BB#0: +; CHECK-NEXT: s_mov_b64 [[INIT_EXEC:s\[[0-9]+:[0-9]+\]]], exec ; CHECK: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]], -; CHECK-NEXT: s_and_b64 exec, exec, -; CHECK-NEXT: s_and_b64 [[DST]], exec, [[DST]] -; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]] +; CHECK-NEXT: s_and_b64 exec, exec, [[INIT_EXEC]] +; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]] +; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]] ; CHECK-NEXT: ; mask branch define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index 35e06fac89b..c1f8d5916ae 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -2,11 +2,33 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -; SI-LABEL: @test_if +; SI-LABEL: {{^}}test_if: ; Make sure the i1 values created by the cfg structurizer pass are ; moved using VALU instructions + + +; waitcnt should be inserted after exec modification +; SI: v_cmp_lt_i32_e32 vcc, 0, +; SI-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_xor_b64 [[SAVE]], exec, [[SAVE]] +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]] +; SI-NEXT: s_cbranch_execz [[FLOW_BB]] + +; SI-NEXT: BB{{[0-9]+}}_1: ; %LeafBlock3 ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1 +; SI: s_and_saveexec_b64 +; SI-NEXT: s_xor_b64 +; SI-NEXT: ; mask branch + +; v_mov should be after exec modification +; SI: [[FLOW_BB]]: +; SI-NEXT: s_or_saveexec_b64 [[SAVE]], [[SAVE]] +; SI-NEXT: v_mov_b32_e32 v{{[0-9]+}} +; SI-NEXT: s_xor_b64 exec, exec, [[SAVE]] +; SI-NEXT: ; mask branch +; define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -17,12 +39,12 @@ entry: case0: %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b - store i32 0, i32 addrspace(1)* %arrayidx1, align 4 + store i32 13, i32 addrspace(1)* %arrayidx1, align 4 br label %end case1: %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b - store i32 1, i32 addrspace(1)* %arrayidx5, align 4 + store i32 17, i32 addrspace(1)* %arrayidx5, align 4 br label %end default: @@ -31,11 +53,11 @@ default: br i1 %cmp8, label %if, label %else if: - store i32 2, i32 addrspace(1)* %arrayidx10, align 4 + store i32 19, i32 addrspace(1)* %arrayidx10, align 4 br label %end else: - store i32 3, i32 addrspace(1)* %arrayidx10, align 4 + store i32 21, i32 addrspace(1)* %arrayidx10, align 4 br label %end end: @@ -139,10 +161,11 @@ exit: ; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]] ; SI: [[LABEL_FLOW]]: -; SI: s_or_b64 exec, exec, [[ORNEG2]] -; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]] -; SI: s_andn2_b64 exec, exec, [[COND_STATE]] -; SI: s_cbranch_execnz [[LABEL_LOOP]] +; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] +; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] +; SI-NEXT: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]] +; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] ; SI: BB#5 ; SI: s_or_b64 exec, exec, [[COND_STATE]] |