summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp127
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp4
5 files changed, 128 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1196fe1512c..45b0a5b2a8d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1214,6 +1214,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_XOR_B64));
break;
+ case AMDGPU::S_OR_B64_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_OR_B64));
+ break;
+
case AMDGPU::S_ANDN2_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -1698,6 +1704,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
+ case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
break;
case AMDGPU::SI_IF:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f6978034d25..383d3b6af2f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -189,6 +189,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
}
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
+def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index a399f7715a2..02b47e869bf 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -55,6 +55,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -79,8 +80,11 @@ class SILowerControlFlow : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ LiveIntervals *LIS = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
@@ -111,7 +115,7 @@ public:
AU.addPreservedID(LiveVariablesID);
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- AU.setPreservesCFG();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -388,23 +392,99 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
MI.eraseFromParent();
}
+// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
+// is split as necessary to keep the exec modification in its own block.
+static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
+ MachineInstr &MI,
+ MachineInstr *NewMI,
+ MachineDominatorTree *DT,
+ LiveIntervals *LIS,
+ MachineLoopInfo *MLI) {
+ assert(NewMI->isTerminator());
+
+ MachineBasicBlock::iterator InsPt = MI.getIterator();
+ if (std::next(MI.getIterator()) == MBB.end()) {
+ // Don't bother with a new block.
+ MBB.insert(InsPt, NewMI);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ MI.eraseFromParent();
+ return &MBB;
+ }
+
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *SplitMBB
+ = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
+
+ // FIXME: This is working around a MachineDominatorTree API defect.
+ //
+ // If a previous pass split a critical edge, it may not have been applied to
+ // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
+ // the CFG of the given block. Make sure to call a dominator tree method that
+ // will flush this cache before touching the successors of the block.
+ MachineDomTreeNode *NodeMBB = nullptr;
+ if (DT)
+ NodeMBB = DT->getNode(&MBB);
+
+ // Move everything to the new block, except the end_cf pseudo.
+ SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
+
+ SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
+
+ MBB.insert(MBB.end(), NewMI);
+
+ if (DT) {
+ std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
+ DT->addNewBlock(SplitMBB, &MBB);
+
+ // Reparent all of the children to the new block body.
+ auto *SplitNode = DT->getNode(SplitMBB);
+ for (auto *Child : Children)
+ DT->changeImmediateDominator(Child, SplitNode);
+ }
+
+ if (MLI) {
+ if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
+ Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
+ }
+
+ if (LIS) {
+ LIS->insertMBBInMaps(SplitMBB);
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ }
+
+ // All live-ins are forwarded.
+ for (auto &LiveIn : MBB.liveins())
+ SplitMBB->addLiveIn(LiveIn);
+
+ MI.eraseFromParent();
+ return SplitMBB;
+}
+
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt = MBB.begin();
- MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .add(MI.getOperand(0));
+ // First, move the instruction. It's unnecessarily difficult to update
+ // LiveIntervals when there's a change in control flow, so move the
+ // instruction before changing the blocks.
+ MBB.splice(InsPt, &MBB, MI.getIterator());
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ LIS->handleMove(MI);
- MI.eraseFromParent();
+ MachineFunction *MF = MBB.getParent();
- if (LIS)
- LIS->handleMove(*NewMI);
+ // Create instruction without inserting it yet.
+ MachineInstr *NewMI
+ = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(0));
+ insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
}
// Returns replace operands for a logical operation, either single result
@@ -470,17 +550,20 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
+ DT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
MRI = &MF.getRegInfo();
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
- MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock *MBB = &*BI;
MachineBasicBlock::iterator I, Next, Last;
- for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
+ for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -501,10 +584,24 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
emitLoop(MI);
break;
- case AMDGPU::SI_END_CF:
+ case AMDGPU::SI_END_CF: {
+ MachineInstr *NextMI = nullptr;
+
+ if (Next != MBB->end())
+ NextMI = &*Next;
+
emitEndCf(MI);
- break;
+ if (NextMI) {
+ MBB = NextMI->getParent();
+ Next = NextMI->getIterator();
+ Last = MBB->end();
+ }
+
+ NextBB = std::next(MBB->getIterator());
+ BE = MF.end();
+ break;
+ }
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B64:
// Cleanup bit manipulations on exec mask
@@ -518,7 +615,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
// Replay newly inserted code to combine masks
- Next = (Last == MBB.end()) ? MBB.begin() : Last;
+ Next = (Last == MBB->end()) ? MBB->begin() : Last;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index c73066fe8d6..4ae64c5b990 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -149,6 +149,12 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
return true;
}
+ case AMDGPU::S_OR_B64_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_OR_B64));
+ return true;
+ }
case AMDGPU::S_ANDN2_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index df06ba5a692..9db1e5af015 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -83,7 +83,7 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
}
static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
- return MI.getOpcode() == AMDGPU::S_OR_B64 &&
+ return MI.getOpcode() == AMDGPU::S_OR_B64_term &&
MI.modifiesRegister(AMDGPU::EXEC, TRI);
}
@@ -362,7 +362,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
// Try to collapse adjacent endifs.
auto E = MBB.end();
- auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
+ auto Lead = MBB.getFirstTerminator();
if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
continue;
OpenPOWER on IntegriCloud