summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relaxation.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-if-dead.ll17
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-cfg.ll32
4 files changed, 51 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 39505404a86..3fd40521801 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -335,6 +335,12 @@ loop:
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: [[BB3]]: ; %bb3
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: v_nop_e64
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: v_nop_e64
+; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_endpgm
define void @expand_requires_expand(i32 %cond0) #0 {
bb0:
@@ -356,6 +362,12 @@ bb2:
br label %bb3
bb3:
+; These NOPs prevent tail-duplication-based outlining
+; from firing, which defeats the need to expand the branches and this test.
+ call void asm sideeffect
+ "v_nop_e64", ""() #0
+ call void asm sideeffect
+ "v_nop_e64", ""() #0
ret void
}
@@ -385,6 +397,7 @@ bb3:
; GCN-NEXT: [[ENDIF]]: ; %endif
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
+; GCN-NEXT: s_sleep 5
; GCN-NEXT: s_endpgm
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
entry:
@@ -402,6 +415,9 @@ if_uniform:
br label %endif
endif:
+ ; layout can remove the split branch if it can copy the return block.
+ ; This call makes the return block long enough that it doesn't get copied.
+ call void @llvm.amdgcn.s.sleep(i32 5);
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
index ef616eb6380..574f9a64e57 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
@@ -37,7 +37,10 @@ bb5: ; preds = %bb3, %bb1
; OPT-NOT: call i1 @llvm.amdgcn.loop
; GCN-LABEL: {{^}}annotate_ret_noloop:
-; GCN: s_cbranch_scc1
+; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]]
+; GCN: s_endpgm
+
+; GCN: {{^}}[[BODY]]:
; GCN: s_endpgm
; GCN: .Lfunc_end1
define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 60cee7a3499..23b67534440 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -263,18 +263,25 @@ exit:
; CHECK-NEXT: s_endpgm
; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_cbranch_scc1 [[BB8:BB[0-9]+_[0-9]+]]
-; CHECK: [[PHIBB]]:
; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
-; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_cbranch_vccnz [[BB10:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_branch [[END:BB[0-9]+_[0-9]+]]
-; CHECK: ; %bb10
+; CHECK [[BB8]]: ; %BB8
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 8
+; CHECK: buffer_store_dword
+; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
+; CHECK-NEXT: s_cbranch_vccz [[END]]
+
+; CHECK: [[BB10]]: ; %bb10
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
; CHECK: buffer_store_dword
-; CHECK: [[ENDBB]]:
+; CHECK: [[END:BB[0-9]+_[0-9]+]]: ; %end
; CHECK-NEXT: s_endpgm
+
define amdgpu_ps void @phi_use_def_before_kill() #0 {
bb:
%tmp = fadd float undef, 1.000000e+00
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index a0060bd368b..c0d0a750b27 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -252,10 +252,12 @@ ENDIF: ; preds = %IF, %main_body
; GCN: s_cmp_lt_i32 [[COND]], 1
; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}}
-; GCN: s_cbranch_vccnz [[EXIT]]
-; GCN: buffer_store
+; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]]
; GCN: {{^}}[[EXIT]]:
; GCN: s_endpgm
+; GCN: {{^}}[[BODY]]:
+; GCN: buffer_store
+; GCN: s_endpgm
define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
@@ -302,9 +304,10 @@ done:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
-; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
-; GCN: s_cbranch_scc1 [[ENDIF_LABEL]]
+; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
+; GCN: s_endpgm
+; GCN: {{^}}[[IF_UNIFORM_LABEL]]:
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
@@ -328,14 +331,13 @@ endif:
; GCN-LABEL: {{^}}divergent_inside_uniform:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
+; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
+; GCN: [[IF_LABEL]]:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
-; GCN: [[ENDIF_LABEL]]:
-; GCN: s_endpgm
define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
entry:
%u_cmp = icmp eq i32 %cond, 0
@@ -363,11 +365,11 @@ endif:
; GCN: buffer_store_dword [[ONE]]
; GCN: s_or_b64 exec, exec, [[MASK]]
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
+; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]]
+; GCN: s_endpgm
+; GCN: [[IF_UNIFORM]]:
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; GCN: buffer_store_dword [[TWO]]
-; GCN: [[EXIT]]:
-; GCN: s_endpgm
define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
@@ -398,16 +400,20 @@ exit:
; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks:
; GCN: s_load_dword [[COND:s[0-9]+]]
; GCN: s_cmp_lt_i32 [[COND]], 1
-; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
+; GCN: s_cbranch_scc1 [[FN:BB[0-9_]+]]
; GCN: BB#1:
; GCN-NOT: cmp
; GCN: buffer_load_dword
; GCN: buffer_store_dword
-; GCN: s_cbranch_scc1 BB[[FNNUM]]_3
+; GCN: s_cbranch_scc0 [[BB7:BB[0-9_]+]]
-; GCN: BB[[FNNUM]]_3:
+; GCN: [[FN]]:
; GCN: s_endpgm
+
+; GCN: [[BB7]]:
+; GCN: s_endpgm
+
define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
OpenPOWER on IntegriCloud