diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/branch-relaxation.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/uniform-cfg.ll | 32 |
4 files changed, 51 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll index 39505404a86..3fd40521801 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -335,6 +335,12 @@ loop: ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: [[BB3]]: ; %bb3 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_endpgm define void @expand_requires_expand(i32 %cond0) #0 { bb0: @@ -356,6 +362,12 @@ bb2: br label %bb3 bb3: +; These NOPs prevent tail-duplication-based outlining +; from firing, which defeats the need to expand the branches and this test. + call void asm sideeffect + "v_nop_e64", ""() #0 + call void asm sideeffect + "v_nop_e64", ""() #0 ret void } @@ -385,6 +397,7 @@ bb3: ; GCN-NEXT: [[ENDIF]]: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] +; GCN-NEXT: s_sleep 5 ; GCN-NEXT: s_endpgm define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 { entry: @@ -402,6 +415,9 @@ if_uniform: br label %endif endif: + ; layout can remove the split branch if it can copy the return block. + ; This call makes the return block long enough that it doesn't get copied. + call void @llvm.amdgcn.s.sleep(i32 5); ret void } diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll index ef616eb6380..574f9a64e57 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,10 @@ bb5: ; preds = %bb3, %bb1 ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_cbranch_scc1 +; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]] +; GCN: s_endpgm + +; GCN: {{^}}[[BODY]]: ; GCN: s_endpgm ; GCN: .Lfunc_end1 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 60cee7a3499..23b67534440 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -263,18 +263,25 @@ exit: ; CHECK-NEXT: s_endpgm ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: -; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_scc1 [[BB8:BB[0-9]+_[0-9]+]] -; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] -; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccnz [[BB10:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_branch [[END:BB[0-9]+_[0-9]+]] -; CHECK: ; %bb10 +; CHECK [[BB8]]: ; %BB8 +; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 8 +; CHECK: buffer_store_dword +; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] +; CHECK-NEXT: s_cbranch_vccz [[END]] + +; CHECK: [[BB10]]: ; %bb10 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 ; CHECK: buffer_store_dword -; CHECK: [[ENDBB]]: +; CHECK: [[END:BB[0-9]+_[0-9]+]]: ; %end ; CHECK-NEXT: s_endpgm + define amdgpu_ps void @phi_use_def_before_kill() #0 { bb: %tmp = fadd float undef, 1.000000e+00 diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index a0060bd368b..c0d0a750b27 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -252,10 +252,12 @@ ENDIF: ; preds = %IF, %main_body ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] ; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} -; GCN: s_cbranch_vccnz [[EXIT]] -; GCN: buffer_store +; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm +; GCN: {{^}}[[BODY]]: +; GCN: buffer_store +; GCN: s_endpgm define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -302,9 +304,10 @@ done: ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]] +; GCN: s_endpgm +; GCN: {{^}}[[IF_UNIFORM_LABEL]]: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { @@ -328,14 +331,13 @@ endif: ; GCN-LABEL: {{^}}divergent_inside_uniform: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: [[IF_LABEL]]: ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] -; GCN: [[ENDIF_LABEL]]: -; GCN: s_endpgm define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { entry: %u_cmp = icmp eq i32 %cond, 0 @@ -363,11 +365,11 @@ endif: ; GCN: buffer_store_dword [[ONE]] ; GCN: s_or_b64 exec, exec, [[MASK]] ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]] +; GCN: s_endpgm +; GCN: [[IF_UNIFORM]]: ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN: buffer_store_dword [[TWO]] -; GCN: [[EXIT]]: -; GCN: s_endpgm define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -398,16 +400,20 @@ exit: ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 -; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 +; GCN: s_cbranch_scc1 [[FN:BB[0-9_]+]] ; GCN: BB#1: ; GCN-NOT: cmp ; GCN: buffer_load_dword ; GCN: buffer_store_dword -; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 +; GCN: s_cbranch_scc0 [[BB7:BB[0-9_]+]] -; GCN: BB[[FNNUM]]_3: +; GCN: [[FN]]: ; GCN: s_endpgm + +; GCN: [[BB7]]: +; GCN: s_endpgm + define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 |