summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertSkips.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp158
6 files changed, 173 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index a55a1747caf..fbed51de0ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -156,6 +156,9 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
+void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
+extern char &SIRemoveShortExecBranchesID;
+
void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8dc6f6e3bf..eb30d659bf0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -228,6 +228,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
+ initializeSIRemoveShortExecBranchesPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
@@ -993,6 +994,7 @@ void GCNPassConfig::addPreEmitPass() {
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
+ addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
addPass(&BranchRelaxationPassID);
}
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 3ed35e57e54..0b8eb4b25ae 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -116,6 +116,7 @@ add_llvm_target(AMDGPUCodeGen
SIOptimizeExecMaskingPreRA.cpp
SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
+ SIRemoveShortExecBranches.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
GCNILPSched.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 87e63fcc4a0..80c044ec00c 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold",
+ "amdgpu-skip-threshold-legacy",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
@@ -466,6 +466,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_EXECZ:
+ ExecBranchStack.push_back(MI.getOperand(0).getMBB());
+ break;
case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index bf052dc3c93..61d2719a3aa 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
- // Insert a pseudo terminator to help keep the verifier happy. This will also
- // be used later when inserting skips.
- MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
+ // during SIRemoveShortExecBranches.
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.add(MI.getOperand(2));
if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(DstReg);
MachineInstr *Branch =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
- .addMBB(DestBB);
+ BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addMBB(DestBB);
if (!LIS) {
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
new file mode 100644
index 00000000000..51779e97ac6
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -0,0 +1,158 @@
+//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optmizes the s_cbranch_execz instructions.
+/// The pass removes this skip instruction for short branches,
+/// if there is no unwanted sideeffect in the fallthrough code sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-remove-short-exec-branches"
+
+static unsigned SkipThreshold;
+
+static cl::opt<unsigned, true> SkipThresholdFlag(
+ "amdgpu-skip-threshold", cl::Hidden,
+ cl::desc(
+ "Number of instructions before jumping over divergent control flow"),
+ cl::location(SkipThreshold), cl::init(12));
+
+namespace {
+
+class SIRemoveShortExecBranches : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII = nullptr;
+ bool getBlockDestinations(MachineBasicBlock &SrcMBB,
+ MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB,
+ SmallVectorImpl<MachineOperand> &Cond);
+ bool mustRetainExeczBranch(const MachineBasicBlock &From,
+ const MachineBasicBlock &To) const;
+ bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
+
+public:
+ static char ID;
+
+ SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
+ initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
+ "SI remove short exec branches", false, false)
+
+char SIRemoveShortExecBranches::ID = 0;
+
+char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
+
+bool SIRemoveShortExecBranches::getBlockDestinations(
+ MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
+ if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ if (!FalseMBB)
+ FalseMBB = SrcMBB.getNextNode();
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::mustRetainExeczBranch(
+ const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+ unsigned NumInstr = 0;
+ const MachineFunction *MF = From.getParent();
+
+ for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+ MBBI != End && MBBI != ToI; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // When a uniform loop is inside non-uniform control flow, the branch
+ // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
+ // when EXEC = 0. We should skip the loop lest it becomes infinite.
+ if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
+ I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+ return true;
+
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ return true;
+
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
+ ++NumInstr;
+ if (NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns true if the skip branch instruction is removed.
+bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
+ MachineBasicBlock &SrcMBB) {
+ MachineBasicBlock *TrueMBB = nullptr;
+ MachineBasicBlock *FalseMBB = nullptr;
+ SmallVector<MachineOperand, 1> Cond;
+
+ if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ // Consider only the forward branches.
+ if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
+ mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
+ MI.eraseFromParent();
+ SrcMBB.removeSuccessor(TrueMBB);
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ MF.RenumberBlocks();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.end())
+ continue;
+
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_EXECZ:
+ Changed = removeExeczBranch(MI, MBB);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
OpenPOWER on IntegriCloud