summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/ret_jump.ll
diff options
context:
space:
mode:
authorcdevadas <cdevadas@amd.com>2020-01-10 22:23:27 +0530
committercdevadas <cdevadas@amd.com>2020-01-15 15:18:16 +0530
commit0dc6c249bffac9f23a605ce4e42a84341da3ddbd (patch)
tree113cc776987199087010ef82f8fd4728b06d0c8b /llvm/test/CodeGen/AMDGPU/ret_jump.ll
parent064859bde79ccd221fd5196fd2d889014c5435c4 (diff)
downloadbcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.tar.gz
bcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.zip
[AMDGPU] Invert the handling of skip insertion.
The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ret_jump.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/ret_jump.ll23
1 files changed, 11 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ffa851919f7..84749170dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -11,12 +11,11 @@
; GCN-NEXT: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
-; GCN: BB{{[0-9]+_[0-9]+}}: ; %unreachable.bb
+; GCN: ; %bb.{{[0-9]+}}: ; %unreachable.bb
; GCN-NEXT: ; divergent unreachable
-; GCN-NEXT: {{^}}[[FLOW]]: ; %Flow
+; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %Flow
; GCN-NEXT: s_or_b64 exec, exec
; GCN-NEXT: [[RET_BB]]:
@@ -55,11 +54,17 @@ ret.bb: ; preds = %else, %main_body
}
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
-; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_vccz
-; GCN: ; %bb.{{[0-9]+}}: ; %else
+; GCN: ; %bb.{{[0-9]+}}: ; %Flow
+; GCN: s_cbranch_execnz [[RETURN:BB[0-9]+_[0-9]+]]
+
+; GCN: ; %UnifiedReturnBlock
+; GCN-NEXT: s_or_b64 exec, exec
+; GCN-NEXT: s_waitcnt
+
+; GCN: BB{{[0-9]+_[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]]
; GCN-NEXT: ; %unreachable.bb
; GCN: ds_write_b32
@@ -67,12 +72,6 @@ ret.bb: ; preds = %else, %main_body
; GCN: ; %ret.bb
; GCN: store_dword
-
-; GCN: ; %UnifiedReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: ; return
-; GCN-NEXT: .Lfunc_end
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
main_body:
%i.i = extractelement <2 x i32> %arg7, i32 0
OpenPOWER on IntegriCloud