summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-10-06 16:20:41 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-10-06 16:20:41 +0000
commit6bc43d8627ca44465e7ce261a0828b70d3460e13 (patch)
tree255944724c060d2eef50f0ccf04df7546aca245c /llvm/lib
parentd391d6f1c32d7316cb7fa8cfa4e039f94133ccbe (diff)
downloadbcm5719-llvm-6bc43d8627ca44465e7ce261a0828b70d3460e13.tar.gz
bcm5719-llvm-6bc43d8627ca44465e7ce261a0828b70d3460e13.zip
BranchRelaxation: Support expanding unconditional branches
AMDGPU needs to expand unconditional branches in a new block with an indirect branch. llvm-svn: 283464
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp89
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp187
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h24
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td2
10 files changed, 355 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 4f0dfaf874f..1d76831d04a 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
@@ -23,6 +24,7 @@ using namespace llvm;
STATISTIC(NumSplit, "Number of basic blocks split");
STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed");
+STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
#define BRANCH_RELAX_NAME "Branch relaxation pass"
@@ -66,17 +68,22 @@ class BranchRelaxation : public MachineFunctionPass {
};
SmallVector<BasicBlockInfo, 16> BlockInfo;
+ std::unique_ptr<RegScavenger> RS;
MachineFunction *MF;
const TargetInstrInfo *TII;
bool relaxBranchInstructions();
void scanFunction();
+
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB);
+
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI);
void adjustBlockOffsets(MachineBasicBlock &MBB);
bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
bool fixupConditionalBranch(MachineInstr &MI);
+ bool fixupUnconditionalBranch(MachineInstr &MI);
uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
unsigned getInstrOffset(const MachineInstr &MI) const;
void dumpBBs();
@@ -182,6 +189,19 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
}
}
+ /// Insert a new empty basic block and insert it after \BB
+MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(BB.getBasicBlock());
+ MF->insert(++BB.getIterator(), NewBB);
+
+ // Insert an entry into BlockInfo to align it properly with the block numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ return NewBB;
+}
+
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
@@ -333,16 +353,55 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
return true;
}
+bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+
+ unsigned OldBrSize = TII->getInstSizeInBytes(MI);
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+
+ int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+ int64_t SrcOffset = getInstrOffset(MI);
+
+ assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+
+ BlockInfo[MBB->getNumber()].Size -= OldBrSize;
+
+ MachineBasicBlock *BranchBB = MBB;
+
+ // If this was an expanded conditional branch, there is already a single
+ // unconditional branch in a block.
+ if (!MBB->empty()) {
+ BranchBB = createNewBlockAfter(*MBB);
+
+ // Add live outs.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins())
+ BranchBB->addLiveIn(LiveIn);
+ }
+
+ BranchBB->addSuccessor(DestBB);
+ MBB->replaceSuccessor(DestBB, BranchBB);
+ }
+
+ DebugLoc DL = MI.getDebugLoc();
+ MI.eraseFromParent();
+
+ // insertUnconditonalBranch may have inserted a new block.
+ BlockInfo[MBB->getNumber()].Size += TII->insertIndirectBranch(
+ *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+
+ computeBlockSize(*BranchBB);
+ adjustBlockOffsets(*MBB);
+ return true;
+}
+
bool BranchRelaxation::relaxBranchInstructions() {
bool Changed = false;
+
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
MachineBasicBlock &MBB = *I;
- MachineBasicBlock::iterator J = MBB.getFirstTerminator();
- if (J == MBB.end())
- continue;
-
MachineBasicBlock::iterator Next;
for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
@@ -377,6 +436,21 @@ bool BranchRelaxation::relaxBranchInstructions() {
Next = MBB.getFirstTerminator();
}
}
+
+ if (MI.isUnconditionalBranch()) {
+ // Unconditional branch destination might be unanalyzable, assume these
+ // are OK.
+ if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI)) {
+ if (!isBlockInRange(MI, *DestBB)) {
+ fixupUnconditionalBranch(MI);
+ ++NumUnconditionalRelaxed;
+ Changed = true;
+ }
+ }
+
+ // Unconditional branch is the last terminator.
+ break;
+ }
}
}
@@ -388,7 +462,12 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "***** BranchRelaxation *****\n");
- TII = MF->getSubtarget().getInstrInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ RS.reset(new RegScavenger());
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index ef7321402da..ef20047377f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -120,6 +120,21 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
emitStartOfRuntimeMetadata(M);
}
+bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const {
+ if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
+ return false;
+
+ if (MBB->empty())
+ return true;
+
+ // If this is a block implementing a long branch, an expression relative to
+ // the start of the block is needed. to the start of the block.
+ // XXX - Is there a smarter way to check this?
+ return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
+}
+
+
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index f4e62ca0df2..3964d43a738 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -131,6 +131,9 @@ public:
void EmitStartOfAsmFile(Module &M) override;
+ bool isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const override;
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 757a9fc1835..5697a5f09ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -47,6 +47,27 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
}
}
+const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
+ const MachineBasicBlock &SrcBB,
+ const MachineOperand &MO) const {
+ const MCExpr *DestBBSym
+ = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx);
+ const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx);
+
+ assert(SrcBB.front().getOpcode() == AMDGPU::S_GETPC_B64 &&
+ ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
+
+ // s_getpc_b64 returns the address of next instruction.
+ const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
+ SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
+
+ if (MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_FORWARD)
+ return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
+
+ assert(MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_BACKWARD);
+ return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
+}
+
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
@@ -71,8 +92,14 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
- MO.getMBB()->getSymbol(), Ctx));
+ if (MO.getTargetFlags() != 0) {
+ MCOp = MCOperand::createExpr(
+ getLongBranchBlockExpr(*MI->getParent(), MO));
+ } else {
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+ }
+
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
@@ -93,6 +120,10 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createExpr(Expr);
break;
}
+ case MachineOperand::MO_MCSymbol:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMCSymbol(), Ctx));
+ break;
}
OutMI.addOperand(MCOp);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
index 2ea58e496cb..8cedda6a706 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -14,8 +14,11 @@ namespace llvm {
class AMDGPUSubtarget;
class AsmPrinter;
+class MachineBasicBlock;
class MachineInstr;
+class MachineOperand;
class MCContext;
+class MCExpr;
class MCInst;
class AMDGPUMCInstLower {
@@ -23,6 +26,9 @@ class AMDGPUMCInstLower {
const AMDGPUSubtarget &ST;
const AsmPrinter &AP;
+ const MCExpr *getLongBranchBlockExpr(const MachineBasicBlock &SrcBB,
+ const MachineOperand &MO) const;
+
public:
AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST,
const AsmPrinter &AP);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a6232ad8128..bc436d7ae42 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -602,6 +602,7 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIShrinkInstructionsPass());
addPass(&SIInsertSkipsPassID);
addPass(createSIDebuggerInsertNopsPass());
+ addPass(&BranchRelaxationPassID);
}
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 887b073acd5..51c30d35508 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -38,12 +38,15 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- // SCRATCH_RSRC_DWORD[01] is a special global variable that represents
- // the scratch buffer.
- if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
- return ELF::R_AMDGPU_ABS32_LO;
- if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
- return ELF::R_AMDGPU_ABS32_HI;
+ if (const auto *SymA = Target.getSymA()) {
+ // SCRATCH_RSRC_DWORD[01] is a special global variable that represents
+ // the scratch buffer.
+ if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
+ return ELF::R_AMDGPU_ABS32_LO;
+
+ if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
+ return ELF::R_AMDGPU_ABS32_HI;
+ }
switch (Target.getAccessVariant()) {
default:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index da725dae47c..ce41d82bbbf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -28,6 +28,13 @@
using namespace llvm;
+// Must be at least 4 to be able to branch over minimum unconditional branch
+// code. This is only for making it possible to write reasonably small tests for
+// long branches.
+static cl::opt<unsigned>
+BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
+ cl::desc("Restrict range of branch instructions (DEBUG)"));
+
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
: AMDGPUInstrInfo(ST), RI(), ST(ST) {}
@@ -1045,6 +1052,128 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
}
+bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+ int64_t BrOffset) const {
+ // BranchRelaxation should never have to check s_setpc_b64 because its dest
+ // block is unanalyzable.
+ assert(BranchOp != AMDGPU::S_SETPC_B64);
+
+ // Convert to dwords.
+ BrOffset /= 4;
+
+ // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
+ // from the next instruction.
+ BrOffset -= 1;
+
+ return isIntN(BranchOffsetBits, BrOffset);
+}
+
+MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
+ const MachineInstr &MI) const {
+ if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
+ // This would be a difficult analysis to perform, but can always be legal so
+ // there's no need to analyze it.
+ return nullptr;
+ }
+
+ return MI.getOperand(0).getMBB();
+}
+
+unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &DestBB,
+ const DebugLoc &DL,
+ int64_t BrOffset,
+ RegScavenger *RS) const {
+ assert(RS && "RegScavenger required for long branching");
+ assert(MBB.empty() &&
+ "new block should be inserted for expanding unconditional branch");
+ assert(MBB.pred_size() == 1);
+
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // FIXME: Virtual register workaround for RegScavenger not working with empty
+ // blocks.
+ unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ auto I = MBB.end();
+
+ // We need to compute the offset relative to the instruction immediately after
+ // s_getpc_b64. Insert pc arithmetic code before last terminator.
+ MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
+
+ // TODO: Handle > 32-bit block address.
+ if (BrOffset >= 0) {
+ BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
+ .addReg(PCReg, RegState::Define, AMDGPU::sub0)
+ .addReg(PCReg, 0, AMDGPU::sub0)
+ .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
+ .addReg(PCReg, RegState::Define, AMDGPU::sub1)
+ .addReg(PCReg, 0, AMDGPU::sub1)
+ .addImm(0);
+ } else {
+ // Backwards branch.
+ BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
+ .addReg(PCReg, RegState::Define, AMDGPU::sub0)
+ .addReg(PCReg, 0, AMDGPU::sub0)
+ .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
+ .addReg(PCReg, RegState::Define, AMDGPU::sub1)
+ .addReg(PCReg, 0, AMDGPU::sub1)
+ .addImm(0);
+ }
+
+ // Insert the indirect branch after the other terminator.
+ BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
+ .addReg(PCReg);
+
+ // FIXME: If spilling is necessary, this will fail because this scavenger has
+ // no emergency stack slots. It is non-trivial to spill in this situation,
+ // because the restore code needs to be specially placed after the
+ // jump. BranchRelaxation then needs to be made aware of the newly inserted
+ // block.
+ //
+ // If a spill is needed for the pc register pair, we need to insert a spill
+ // restore block right before the destination block, and insert a short branch
+ // into the old destination block's fallthrough predecessor.
+ // e.g.:
+ //
+ // s_cbranch_scc0 skip_long_branch:
+ //
+ // long_branch_bb:
+ // spill s[8:9]
+ // s_getpc_b64 s[8:9]
+ // s_add_u32 s8, s8, restore_bb
+ // s_addc_u32 s9, s9, 0
+ // s_setpc_b64 s[8:9]
+ //
+ // skip_long_branch:
+ // foo;
+ //
+ // .....
+ //
+ // dest_bb_fallthrough_predecessor:
+ // bar;
+ // s_branch dest_bb
+ //
+ // restore_bb:
+ // restore s[8:9]
+ // fallthrough dest_bb
+ ///
+ // dest_bb:
+ // buzz;
+
+ RS->enterBasicBlockEnd(MBB);
+ unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
+ MachineBasicBlock::iterator(GetPC), 0);
+ MRI.replaceRegWith(PCReg, Scav);
+ MRI.clearVirtRegs();
+ RS->setRegUsed(Scav);
+
+ return 4 + 8 + 4 + 4;
+}
+
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
switch (Cond) {
case SIInstrInfo::SCC_TRUE:
@@ -1083,15 +1212,12 @@ SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
}
}
-bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-
- if (I == MBB.end())
- return false;
-
+bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
if (I->getOpcode() == AMDGPU::S_BRANCH) {
// Unconditional Branch
TBB = I->getOperand(0).getMBB();
@@ -1122,6 +1248,44 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
return true;
}
+bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ MachineBasicBlock::iterator I = MBB.getFirstTerminator();
+ if (I == MBB.end())
+ return false;
+
+ if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
+ return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
+
+ ++I;
+
+ // TODO: Should be able to treat as fallthrough?
+ if (I == MBB.end())
+ return true;
+
+ if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
+ return true;
+
+ MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
+
+ // Specifically handle the case where the conditional branch is to the same
+ // destination as the mask branch. e.g.
+ //
+ // si_mask_branch BB8
+ // s_cbranch_execz BB8
+ // s_cbranch BB9
+ //
+ // This is required to understand divergent loops which may need the branches
+ // to be relaxed.
+ if (TBB != MaskBrDest || Cond.empty())
+ return true;
+
+ auto Pred = Cond[0].getImm();
+ return (Pred != EXECZ && Pred != EXECNZ);
+}
+
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
MachineBasicBlock::iterator I = MBB.getFirstTerminator();
@@ -1130,6 +1294,11 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
unsigned RemovedSize = 0;
while (I != MBB.end()) {
MachineBasicBlock::iterator Next = std::next(I);
+ if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
+ I = Next;
+ continue;
+ }
+
RemovedSize += getInstSizeInBytes(*I);
I->eraseFromParent();
++Count;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 11fe8c90396..b08801cd2c3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -158,6 +158,24 @@ public:
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ bool isBranchOffsetInRange(unsigned BranchOpc,
+ int64_t BrOffset) const override;
+
+ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
+ unsigned insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ const DebugLoc &DL,
+ int64_t BrOffset,
+ RegScavenger *RS = nullptr) const override;
+
+ bool analyzeBranchImpl(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -618,6 +636,12 @@ namespace AMDGPU {
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
+
+ // For MachineOperands.
+ enum TargetFlags {
+ TF_LONG_BRANCH_FORWARD = 1 << 0,
+ TF_LONG_BRANCH_BACKWARD = 1 << 1
+ };
} // End namespace AMDGPU
namespace SI {
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index d31002b999a..404ee4260aa 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -25,6 +25,7 @@ class SOP1_Pseudo <string opName, dag outs, dag ins,
let SALU = 1;
let SOP1 = 1;
let SchedRW = [WriteSALU];
+ let Size = 4;
let UseNamedOperandTable = 1;
string Mnemonic = opName;
@@ -41,6 +42,7 @@ class SOP1_Real<bits<8> op, SOP1_Pseudo ps> :
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let Size = 4;
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
OpenPOWER on IntegriCloud