summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertSkips.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp4
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir194
3 files changed, 198 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index fd0951e7545..d7a7b79e68a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -109,9 +109,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) {
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
- if (From.succ_empty())
- return false;
-
unsigned NumInstr = 0;
const MachineFunction *MF = From.getParent();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b489b090cc..0d23323c4fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2479,6 +2479,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
if (MI.mayStore() && isSMRD(MI))
return true; // scalar store or atomic
+ // This will terminate the function when other lanes may need to continue.
+ if (MI.isReturn())
+ return true;
+
// These instructions cause shader I/O that may cause hardware lockups
// when executed with an empty EXEC mask.
//
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
new file mode 100644
index 00000000000..554094cf9c0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -0,0 +1,194 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
+
+---
+name: skip_branch_taildup_endpgm
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: skip_branch_taildup_endpgm
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ ; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ ; CHECK: S_WAITCNT 127
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
+ ; CHECK: renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
+ ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ ; CHECK: S_WAITCNT 112
+ ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
+ ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; CHECK: S_BRANCH %bb.3
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
+ ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: S_BRANCH %bb.4
+ ; CHECK: bb.2:
+ ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
+ ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ ; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.4:
+ ; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
+ ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7
+
+ renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ S_WAITCNT 127
+ $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
+ renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
+ renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ S_WAITCNT 112
+ V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
+ SI_MASK_BRANCH %bb.2, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.2:
+ successors: %bb.3, %bb.4
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ SI_MASK_BRANCH %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.4:
+ liveins: $sgpr2_sgpr3
+
+ $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ S_ENDPGM 0
+
+ bb.1:
+ successors: %bb.3, %bb.4
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ SI_MASK_BRANCH %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
+ $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ S_ENDPGM 0
+
+...
+
+---
+name: skip_branch_taildup_ret
+body: |
+ ; CHECK-LABEL: name: skip_branch_taildup_ret
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK: S_WAITCNT 0
+ ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
+ ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; CHECK: S_BRANCH %bb.3
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
+ ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+ ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: S_BRANCH %bb.4
+ ; CHECK: bb.2:
+ ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
+ ; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+ ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK: bb.4:
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
+ ; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+ ; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+ S_WAITCNT 0
+ V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
+ SI_MASK_BRANCH %bb.2, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.2:
+ successors: %bb.3, %bb.4
+ liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+ renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+ SI_MASK_BRANCH %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.4:
+ liveins: $sgpr6_sgpr7, $sgpr30_sgpr31
+
+ $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+ renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31
+
+ bb.1:
+ successors: %bb.3, %bb.4
+ liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+ renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
+ renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+ SI_MASK_BRANCH %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+ renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
+ $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+ renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31
+
+...
OpenPOWER on IntegriCloud