summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-22 19:33:16 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-22 19:33:16 +0000
commit78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch)
tree0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/test
parent88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff)
downloadbcm5719-llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz
bcm5719-llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip
AMDGPU: Split SILowerControlFlow into two pieces
Do most of the lowering in a pre-RA pass. Keep the skip jump insertion late, plus a few other things that require more work to move out. One concern I have is now there may be COPY instructions which do not have the necessary implicit exec uses if they will be lowered to v_mov_b32. This has a positive effect on SGPR usage in shader-db. llvm-svn: 279464
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/else.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/valu-i1.ll41
2 files changed, 37 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index bb885ac3884..ef1e64763d4 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -25,11 +25,13 @@ end:
}
; CHECK-LABEL: {{^}}else_execfix_leave_wqm:
+; CHECK: ; BB#0:
+; CHECK-NEXT: s_mov_b64 [[INIT_EXEC:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: ; %Flow
; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]],
-; CHECK-NEXT: s_and_b64 exec, exec,
-; CHECK-NEXT: s_and_b64 [[DST]], exec, [[DST]]
-; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]]
+; CHECK-NEXT: s_and_b64 exec, exec, [[INIT_EXEC]]
+; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
+; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
; CHECK-NEXT: ; mask branch
define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 35e06fac89b..c1f8d5916ae 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -2,11 +2,33 @@
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-; SI-LABEL: @test_if
+; SI-LABEL: {{^}}test_if:
; Make sure the i1 values created by the cfg structurizer pass are
; moved using VALU instructions
+
+
+; waitcnt should be inserted after exec modification
+; SI: v_cmp_lt_i32_e32 vcc, 0,
+; SI-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
+; SI-NEXT: s_xor_b64 [[SAVE]], exec, [[SAVE]]
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]]
+; SI-NEXT: s_cbranch_execz [[FLOW_BB]]
+
+; SI-NEXT: BB{{[0-9]+}}_1: ; %LeafBlock3
; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
; SI: v_mov_b32_e32 v{{[0-9]}}, -1
+; SI: s_and_saveexec_b64
+; SI-NEXT: s_xor_b64
+; SI-NEXT: ; mask branch
+
+; v_mov should be after exec modification
+; SI: [[FLOW_BB]]:
+; SI-NEXT: s_or_saveexec_b64 [[SAVE]], [[SAVE]]
+; SI-NEXT: v_mov_b32_e32 v{{[0-9]+}}
+; SI-NEXT: s_xor_b64 exec, exec, [[SAVE]]
+; SI-NEXT: ; mask branch
+;
define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -17,12 +39,12 @@ entry:
case0:
%arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
- store i32 0, i32 addrspace(1)* %arrayidx1, align 4
+ store i32 13, i32 addrspace(1)* %arrayidx1, align 4
br label %end
case1:
%arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
- store i32 1, i32 addrspace(1)* %arrayidx5, align 4
+ store i32 17, i32 addrspace(1)* %arrayidx5, align 4
br label %end
default:
@@ -31,11 +53,11 @@ default:
br i1 %cmp8, label %if, label %else
if:
- store i32 2, i32 addrspace(1)* %arrayidx10, align 4
+ store i32 19, i32 addrspace(1)* %arrayidx10, align 4
br label %end
else:
- store i32 3, i32 addrspace(1)* %arrayidx10, align 4
+ store i32 21, i32 addrspace(1)* %arrayidx10, align 4
br label %end
end:
@@ -139,10 +161,11 @@ exit:
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
; SI: [[LABEL_FLOW]]:
-; SI: s_or_b64 exec, exec, [[ORNEG2]]
-; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
-; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
-; SI: s_cbranch_execnz [[LABEL_LOOP]]
+; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
+; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
+; SI-NEXT: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
+; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
+; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
; SI: BB#5
; SI: s_or_b64 exec, exec, [[COND_STATE]]
OpenPOWER on IntegriCloud