diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp new file mode 100644 index 00000000000..51779e97ac6 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -0,0 +1,158 @@ +//===-- SIRemoveShortExecBranches.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass optmizes the s_cbranch_execz instructions. +/// The pass removes this skip instruction for short branches, +/// if there is no unwanted sideeffect in the fallthrough code sequence. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-remove-short-exec-branches" + +static unsigned SkipThreshold; + +static cl::opt<unsigned, true> SkipThresholdFlag( + "amdgpu-skip-threshold", cl::Hidden, + cl::desc( + "Number of instructions before jumping over divergent control flow"), + cl::location(SkipThreshold), cl::init(12)); + +namespace { + +class SIRemoveShortExecBranches : public MachineFunctionPass { +private: + const SIInstrInfo *TII = nullptr; + bool getBlockDestinations(MachineBasicBlock &SrcMBB, + MachineBasicBlock *&TrueMBB, + MachineBasicBlock *&FalseMBB, + SmallVectorImpl<MachineOperand> &Cond); + bool mustRetainExeczBranch(const MachineBasicBlock &From, + const MachineBasicBlock &To) const; + bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); + +public: + static char ID; + + SIRemoveShortExecBranches() : MachineFunctionPass(ID) { + initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE, + "SI remove short exec branches", false, false) + +char SIRemoveShortExecBranches::ID = 0; + +char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID; + +bool SIRemoveShortExecBranches::getBlockDestinations( + MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, + MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) { + if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond)) + return false; + + if (!FalseMBB) + FalseMBB = SrcMBB.getNextNode(); + + return true; +} + +bool SIRemoveShortExecBranches::mustRetainExeczBranch( + const MachineBasicBlock &From, const MachineBasicBlock &To) const { + unsigned NumInstr = 0; + const MachineFunction *MF = From.getParent(); + + for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); + MBBI != End && MBBI != ToI; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + // When a uniform loop is inside non-uniform control flow, the branch + // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken + // when EXEC = 0. We should skip the loop lest it becomes infinite. + if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || + I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) + return true; + + if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) + return true; + + // These instructions are potentially expensive even if EXEC = 0. + if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || + I->getOpcode() == AMDGPU::S_WAITCNT) + return true; + + ++NumInstr; + if (NumInstr >= SkipThreshold) + return true; + } + } + + return false; +} + +// Returns true if the skip branch instruction is removed. +bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI, + MachineBasicBlock &SrcMBB) { + MachineBasicBlock *TrueMBB = nullptr; + MachineBasicBlock *FalseMBB = nullptr; + SmallVector<MachineOperand, 1> Cond; + + if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond)) + return false; + + // Consider only the forward branches. + if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || + mustRetainExeczBranch(*FalseMBB, *TrueMBB)) + return false; + + LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI); + MI.eraseFromParent(); + SrcMBB.removeSuccessor(TrueMBB); + + return true; +} + +bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + TII = ST.getInstrInfo(); + MF.RenumberBlocks(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + if (MBBI == MBB.end()) + continue; + + MachineInstr &MI = *MBBI; + switch (MI.getOpcode()) { + case AMDGPU::S_CBRANCH_EXECZ: + Changed = removeExeczBranch(MI, MBB); + break; + default: + break; + } + } + + return Changed; +} |