summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineCSE.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineCSE.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp127
1 files changed, 118 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 519cb4703ca..d05e7f542d0 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -49,6 +50,8 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPREs, "Number of partial redundant expression"
+ " transformed to fully redundant");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
STATISTIC(NumCrossBBCSEs,
@@ -84,6 +87,7 @@ namespace {
void releaseMemory() override {
ScopeMap.clear();
+ PREMap.clear();
Exps.clear();
}
@@ -98,6 +102,8 @@ namespace {
unsigned LookAheadLimit = 0;
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
+ DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
+ PREMap;
ScopedHTType VNT;
SmallVector<MachineInstr *, 64> Exps;
unsigned CurrVN = 0;
@@ -116,13 +122,17 @@ namespace {
PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI);
+ MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
- bool ProcessBlock(MachineBasicBlock *MBB);
+ bool ProcessBlockCSE(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
+
+ bool isPRECandidate(MachineInstr *MI);
+ bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
+ bool PerformSimplePRE(MachineDominatorTree *DT);
};
} // end anonymous namespace
@@ -405,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
}
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
-/// common expression that defines Reg.
+/// common expression that defines Reg. CSBB is basic block where CSReg is
+/// defined.
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI) {
+ MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
// If CSReg is used at all uses of Reg, CSE should not increase register
@@ -433,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
if (TII->isAsCheapAsAMove(*MI)) {
- MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
@@ -488,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
ScopeMap.erase(SI);
}
-bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -598,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -738,7 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
- Changed |= ProcessBlock(MBB);
+ Changed |= ProcessBlockCSE(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
ExitScopeIfDone(Node, OpenChildren);
}
@@ -746,6 +756,102 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
return Changed;
}
+// We use stronger checks for PRE candidate rather than for CSE ones to embrace
+// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
+// to exclude instrs created by PRE that won't be CSEed later.
+bool MachineCSE::isPRECandidate(MachineInstr *MI) {
+ if (!isCSECandidate(MI) ||
+ MI->isNotDuplicable() ||
+ MI->mayLoad() ||
+ MI->isAsCheapAsAMove() ||
+ MI->getNumDefs() != 1 ||
+ MI->getNumExplicitDefs() != 1)
+ return false;
+
+ for (auto def : MI->defs())
+ if (!TRI->isVirtualRegister(def.getReg()))
+ return false;
+
+ for (auto use : MI->uses())
+ if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+ return false;
+
+ return true;
+}
+
+bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isPRECandidate(MI))
+ continue;
+
+ if (!PREMap.count(MI)) {
+ PREMap[MI] = MBB;
+ continue;
+ }
+
+ auto MBB1 = PREMap[MI];
+ assert(
+ !DT->properlyDominates(MBB, MBB1) &&
+ "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
+ auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
+
+ // Two instrs are partial redundant if their basic blocks are reachable
+ // from one to another but one doesn't dominate another.
+ if (CMBB != MBB1) {
+ auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
+ if (BB != nullptr && BB1 != nullptr &&
+ (isPotentiallyReachable(BB1, BB) ||
+ isPotentiallyReachable(BB, BB1))) {
+
+ assert(MI->getOperand(0).isDef() &&
+ "First operand of instr with one explicit def must be this def");
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ continue;
+ MachineInstr &NewMI =
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ NewMI.getOperand(0).setReg(NewReg);
+
+ PREMap[MI] = CMBB;
+ ++NumPREs;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+// This simple PRE (partial redundancy elimination) pass doesn't actually
+// eliminate partial redundancy but transforms it to full redundancy,
+// anticipating that the next CSE step will eliminate this created redundancy.
+// If CSE doesn't eliminate this, than created instruction will remain dead
+// and eliminated later by Remove Dead Machine Instructions pass.
+bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
+ SmallVector<MachineDomTreeNode *, 32> BBs;
+
+ PREMap.clear();
+ bool Changed = false;
+ BBs.push_back(DT->getRootNode());
+ do {
+ auto Node = BBs.pop_back_val();
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ for (MachineDomTreeNode *Child : Children)
+ BBs.push_back(Child);
+
+ MachineBasicBlock *MBB = Node->getBlock();
+ Changed |= ProcessBlockPRE(DT, MBB);
+
+ } while (!BBs.empty());
+
+ return Changed;
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -756,5 +862,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
- return PerformCSE(DT->getRootNode());
+ bool ChangedPRE, ChangedCSE;
+ ChangedPRE = PerformSimplePRE(DT);
+ ChangedCSE = PerformCSE(DT->getRootNode());
+ return ChangedPRE || ChangedCSE;
}
OpenPOWER on IntegriCloud