[AMDGPU] Invert the handling of skip insertion.

The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092
author: cdevadas <cdevadas@amd.com> 2020-01-10 22:23:27 +0530
committer: cdevadas <cdevadas@amd.com> 2020-01-15 15:18:16 +0530
commit: 0dc6c249bffac9f23a605ce4e42a84341da3ddbd (patch)
tree: 113cc776987199087010ef82f8fd4728b06d0c8b /llvm/test/CodeGen/AMDGPU/ret_jump.ll
parent: 064859bde79ccd221fd5196fd2d889014c5435c4 (diff)
download: bcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.tar.gz
bcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.zip
1 files changed, 11 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ffa851919f7..84749170dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -11,12 +11,11 @@
 ; GCN-NEXT: ; %else
 
 ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
 
-; GCN: BB{{[0-9]+_[0-9]+}}: ; %unreachable.bb
+; GCN: ; %bb.{{[0-9]+}}: ; %unreachable.bb
 ; GCN-NEXT: ; divergent unreachable
 
-; GCN-NEXT: {{^}}[[FLOW]]: ; %Flow
+; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %Flow
 ; GCN-NEXT: s_or_b64 exec, exec
 
 ; GCN-NEXT: [[RET_BB]]:
@@ -55,11 +54,17 @@ ret.bb:                                          ; preds = %else, %main_body
 }
 
 ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
-; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_vccz
 
-; GCN: ; %bb.{{[0-9]+}}: ; %else
+; GCN: ; %bb.{{[0-9]+}}: ; %Flow
+; GCN: s_cbranch_execnz [[RETURN:BB[0-9]+_[0-9]+]]
+
+; GCN: ; %UnifiedReturnBlock
+; GCN-NEXT: s_or_b64 exec, exec
+; GCN-NEXT: s_waitcnt
+
+; GCN: BB{{[0-9]+_[0-9]+}}: ; %else
 ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]]
 
 ; GCN-NEXT:  ; %unreachable.bb
 ; GCN: ds_write_b32
@@ -67,12 +72,6 @@ ret.bb:                                          ; preds = %else, %main_body
 
 ; GCN: ; %ret.bb
 ; GCN: store_dword
-
-; GCN: ; %UnifiedReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: ; return
-; GCN-NEXT: .Lfunc_end
 define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
 main_body:
   %i.i = extractelement <2 x i32> %arg7, i32 0
author	cdevadas <cdevadas@amd.com>	2020-01-10 22:23:27 +0530
committer	cdevadas <cdevadas@amd.com>	2020-01-15 15:18:16 +0530
commit	0dc6c249bffac9f23a605ce4e42a84341da3ddbd (patch)
tree	113cc776987199087010ef82f8fd4728b06d0c8b /llvm/test/CodeGen/AMDGPU/ret_jump.ll
parent	064859bde79ccd221fd5196fd2d889014c5435c4 (diff)
download	bcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.tar.gz bcm5719-llvm-0dc6c249bffac9f23a605ce4e42a84341da3ddbd.zip