summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp142
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp14
5 files changed, 28 insertions, 142 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f4f858fa483..ea877272826 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1397,12 +1397,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_OR_B32));
break;
- case AMDGPU::S_OR_B64_term:
- // This is only a terminator to get the correct spill code placement during
- // register allocation.
- MI.setDesc(get(AMDGPU::S_OR_B64));
- break;
-
case AMDGPU::S_ANDN2_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -1895,7 +1889,6 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
- case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
case AMDGPU::S_MOV_B32_term:
case AMDGPU::S_XOR_B32_term:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f67c0a20861..934b50b87de 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -193,7 +193,6 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
let WaveSizePredicate = isWave64 in {
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
-def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index c95c12c8f49..1a51b7ebffa 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -55,7 +55,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -80,16 +79,12 @@ class SILowerControlFlow : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- MachineRegisterInfo *MRI = nullptr;
LiveIntervals *LIS = nullptr;
- MachineDominatorTree *DT = nullptr;
- MachineLoopInfo *MLI = nullptr;
-
+ MachineRegisterInfo *MRI = nullptr;
const TargetRegisterClass *BoolRC = nullptr;
unsigned AndOpc;
unsigned OrOpc;
- unsigned OrTermOpc;
unsigned XorOpc;
unsigned MovTermOpc;
unsigned Andn2TermOpc;
@@ -101,7 +96,7 @@ private:
void emitElse(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
void emitLoop(MachineInstr &MI);
- MachineBasicBlock *emitEndCf(MachineInstr &MI);
+ void emitEndCf(MachineInstr &MI);
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@@ -126,7 +121,7 @@ public:
AU.addPreservedID(LiveVariablesID);
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
-
+ AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -254,7 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
LIS->InsertMachineInstrInMaps(*SetExec);
LIS->InsertMachineInstrInMaps(*NewBr);
- LIS->removeAllRegUnitsForPhysReg(Exec);
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
MI.eraseFromParent();
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
@@ -338,7 +333,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
- LIS->removeAllRegUnitsForPhysReg(Exec);
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
}
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
@@ -403,99 +398,23 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
MI.eraseFromParent();
}
-// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
-// is split as necessary to keep the exec modification in its own block.
-static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
- MachineInstr &MI,
- MachineInstr *NewMI,
- MachineDominatorTree *DT,
- LiveIntervals *LIS,
- MachineLoopInfo *MLI) {
- assert(NewMI->isTerminator());
-
- MachineBasicBlock::iterator InsPt = MI.getIterator();
- if (std::next(MI.getIterator()) == MBB.end()) {
- // Don't bother with a new block.
- MBB.insert(InsPt, NewMI);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
- MI.eraseFromParent();
- return &MBB;
- }
-
- MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *SplitMBB
- = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
-
- MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
-
- // FIXME: This is working around a MachineDominatorTree API defect.
- //
- // If a previous pass split a critical edge, it may not have been applied to
- // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
- // the CFG of the given block. Make sure to call a dominator tree method that
- // will flush this cache before touching the successors of the block.
- MachineDomTreeNode *NodeMBB = nullptr;
- if (DT)
- NodeMBB = DT->getNode(&MBB);
-
- // Move everything to the new block, except the end_cf pseudo.
- SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
-
- SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
- MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
-
- MBB.insert(MBB.end(), NewMI);
-
- if (DT) {
- std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
- DT->addNewBlock(SplitMBB, &MBB);
-
- // Reparent all of the children to the new block body.
- auto *SplitNode = DT->getNode(SplitMBB);
- for (auto *Child : Children)
- DT->changeImmediateDominator(Child, SplitNode);
- }
-
- if (MLI) {
- if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
- Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
- }
-
- if (LIS) {
- LIS->insertMBBInMaps(SplitMBB);
- LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
- }
-
- // All live-ins are forwarded.
- for (auto &LiveIn : MBB.liveins())
- SplitMBB->addLiveIn(LiveIn);
-
- MI.eraseFromParent();
- return SplitMBB;
-}
-
-MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
+void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt = MBB.begin();
+ MachineInstr *NewMI =
+ BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
+ .addReg(Exec)
+ .add(MI.getOperand(0));
- // First, move the instruction. It's unnecessarily difficult to update
- // LiveIntervals when there's a change in control flow, so move the
- // instruction before changing the blocks.
- MBB.splice(InsPt, &MBB, MI.getIterator());
if (LIS)
- LIS->handleMove(MI);
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
- MachineFunction *MF = MBB.getParent();
+ MI.eraseFromParent();
- // Create instruction without inserting it yet.
- MachineInstr *NewMI
- = BuildMI(*MF, DL, TII->get(OrTermOpc), Exec)
- .addReg(Exec)
- .add(MI.getOperand(0));
- return insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
+ if (LIS)
+ LIS->handleMove(*NewMI);
}
// Returns replace operands for a logical operation, either single result
@@ -517,7 +436,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
// A copy with implcitly defined exec inserted earlier is an exclusion, it
// does not really modify exec.
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
- if (I->modifiesRegister(Exec, TRI) &&
+ if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
!(I->isCopy() && I->getOperand(0).getReg() != Exec))
return;
@@ -560,16 +479,12 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
- DT = getAnalysisIfAvailable<MachineDominatorTree>();
- MLI = getAnalysisIfAvailable<MachineLoopInfo>();
-
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
if (ST.isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
OrOpc = AMDGPU::S_OR_B32;
- OrTermOpc = AMDGPU::S_OR_B32_term;
XorOpc = AMDGPU::S_XOR_B32;
MovTermOpc = AMDGPU::S_MOV_B32_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
@@ -579,7 +494,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
} else {
AndOpc = AMDGPU::S_AND_B64;
OrOpc = AMDGPU::S_OR_B64;
- OrTermOpc = AMDGPU::S_OR_B64_term;
XorOpc = AMDGPU::S_XOR_B64;
MovTermOpc = AMDGPU::S_MOV_B64_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
@@ -592,11 +506,11 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
- MachineBasicBlock *MBB = &*BI;
+ MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next, Last;
- for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
+ for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -617,24 +531,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
emitLoop(MI);
break;
- case AMDGPU::SI_END_CF: {
- MachineInstr *NextMI = nullptr;
-
- if (Next != MBB->end())
- NextMI = &*Next;
-
- MBB = emitEndCf(MI);
-
- if (NextMI) {
- MBB = NextMI->getParent();
- Next = NextMI->getIterator();
- Last = MBB->end();
- }
-
- NextBB = std::next(MBB->getIterator());
- BE = MF.end();
+ case AMDGPU::SI_END_CF:
+ emitEndCf(MI);
break;
- }
+
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B64:
case AMDGPU::S_AND_B32:
@@ -650,7 +550,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
// Replay newly inserted code to combine masks
- Next = (Last == MBB->end()) ? MBB->begin() : Last;
+ Next = (Last == MBB.end()) ? MBB.begin() : Last;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index b8e076f5efd..cc9b46a7558 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -202,12 +202,6 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
MI.setDesc(TII.get(AMDGPU::S_OR_B32));
return true;
}
- case AMDGPU::S_OR_B64_term: {
- // This is only a terminator to get the correct spill code placement during
- // register allocation.
- MI.setDesc(TII.get(AMDGPU::S_OR_B64));
- return true;
- }
case AMDGPU::S_ANDN2_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 0eb850fe176..681c3b35f75 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -82,14 +82,14 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
return new SIOptimizeExecMaskingPreRA();
}
-static bool isEndCF(const MachineInstr &MI, const GCNSubtarget &ST,
- const SIRegisterInfo *TRI) {
+static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
+ const GCNSubtarget &ST) {
if (ST.isWave32()) {
- return MI.getOpcode() == AMDGPU::S_OR_B32_term &&
+ return MI.getOpcode() == AMDGPU::S_OR_B32 &&
MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
}
- return MI.getOpcode() == AMDGPU::S_OR_B64_term &&
+ return MI.getOpcode() == AMDGPU::S_OR_B64 &&
MI.modifiesRegister(AMDGPU::EXEC, TRI);
}
@@ -379,13 +379,13 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
// Try to collapse adjacent endifs.
auto E = MBB.end();
- auto Lead = MBB.getFirstTerminator();
- if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, ST, TRI))
+ auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
+ if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
continue;
MachineBasicBlock *TmpMBB = &MBB;
auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
- if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, ST, TRI) ||
+ if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
!getOrExecSource(*NextLead, *TII, MRI, ST))
continue;
OpenPOWER on IntegriCloud