summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Transform/CodePreparation.cpp101
-rw-r--r--polly/test/CodePreparation/multiple_loops_trivial_phis.ll56
-rw-r--r--polly/test/CodePreparation/single_loop_trivial_phi.ll50
3 files changed, 189 insertions, 18 deletions
diff --git a/polly/lib/Transform/CodePreparation.cpp b/polly/lib/Transform/CodePreparation.cpp
index b0032498a8e..0989fe5f75b 100644
--- a/polly/lib/Transform/CodePreparation.cpp
+++ b/polly/lib/Transform/CodePreparation.cpp
@@ -49,6 +49,28 @@ using namespace llvm;
using namespace polly;
namespace {
+
+// Helper function which (for a given PHI node):
+//
+// 1) Remembers all incoming values and the associated basic blocks
+// 2) Demotes the phi node to the stack
+// 3) Remembers the correlation between the PHI node and the new alloca
+//
+// When we combine the information from 1) and 3) we know the values stored
+// in this alloca at the end of the predecessor basic blocks of the PHI.
+static void DemotePHI(
+ PHINode *PN, DenseMap<PHINode *, AllocaInst *> &PNallocMap,
+ DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> &ValueLocToPhiMap) {
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ auto *InVal = PN->getIncomingValue(i);
+ auto *InBB = PN->getIncomingBlock(i);
+ ValueLocToPhiMap[std::make_pair(InVal, InBB)] = PN;
+ }
+
+ PNallocMap[PN] = DemotePHIToStack(PN);
+}
+
/// @brief Prepare the IR for the scop detection.
///
class CodePreparation : public FunctionPass {
@@ -84,12 +106,23 @@ void CodePreparation::clear() {}
CodePreparation::~CodePreparation() { clear(); }
bool CodePreparation::eliminatePHINodes(Function &F) {
- // The PHINodes that will be deleted.
+ // The PHINodes that will be demoted.
+ std::vector<PHINode *> PNtoDemote;
+ // The PHINodes that will be deleted (stack slot sharing).
std::vector<PHINode *> PNtoDelete;
// The PHINodes that will be preserved.
- std::vector<PHINode *> PreservedPNs;
-
- // Scan the PHINodes in this function.
+ std::vector<PHINode *> PNtoPreserve;
+ // Map to remember values stored in PHINodes at the end of basic blocks.
+ DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> ValueLocToPhiMap;
+ // Map from PHINodes to their alloca (after demotion) counterpart.
+ DenseMap<PHINode *, AllocaInst *> PNallocMap;
+
+ // Scan the PHINodes in this function and categorize them to be either:
+ // o Preserved, if they are (canonical) induction variables or can be
+ // synthesized during code generation ('SCEVable')
+ // o Deleted, if they are trivial PHI nodes (one incoming value) and the
+ // incoming value is a PHI node we will demote
+ // o Demoted, if they do not fit any of the previous categories
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI)
for (BasicBlock::iterator II = BI->begin(), IE = BI->getFirstNonPHI();
II != IE; ++II) {
@@ -98,7 +131,7 @@ bool CodePreparation::eliminatePHINodes(Function &F) {
if (SE->isSCEVable(PN->getType())) {
const SCEV *S = SE->getSCEV(PN);
if (!isa<SCEVUnknown>(S) && !isa<SCEVCouldNotCompute>(S)) {
- PreservedPNs.push_back(PN);
+ PNtoPreserve.push_back(PN);
continue;
}
}
@@ -106,7 +139,7 @@ bool CodePreparation::eliminatePHINodes(Function &F) {
if (Loop *L = LI->getLoopFor(BI)) {
// Induction variables will be preserved.
if (L->getCanonicalInductionVariable() == PN) {
- PreservedPNs.push_back(PN);
+ PNtoPreserve.push_back(PN);
continue;
}
}
@@ -114,26 +147,58 @@ bool CodePreparation::eliminatePHINodes(Function &F) {
// As DemotePHIToStack does not support invoke edges, we preserve
// PHINodes that have invoke edges.
- if (hasInvokeEdge(PN))
- PreservedPNs.push_back(PN);
- else
- PNtoDelete.push_back(PN);
+ if (hasInvokeEdge(PN)) {
+ PNtoPreserve.push_back(PN);
+ } else {
+ if (PN->getNumIncomingValues() == 1)
+ PNtoDelete.push_back(PN);
+ else
+ PNtoDemote.push_back(PN);
+ }
}
- if (PNtoDelete.empty())
+ if (PNtoDemote.empty() && PNtoDelete.empty())
return false;
- while (!PNtoDelete.empty()) {
- PHINode *PN = PNtoDelete.back();
- PNtoDelete.pop_back();
+ while (!PNtoDemote.empty()) {
+ PHINode *PN = PNtoDemote.back();
+ PNtoDemote.pop_back();
+ DemotePHI(PN, PNallocMap, ValueLocToPhiMap);
+ }
+
+ // For each trivial PHI we encountered (and we want to delete) we try to find
+ // the value it will hold in a alloca we already created by PHI demotion. If
+ // we succeed (the incoming value is stored in an alloca at the predecessor
+ // block), we can replace the trivial PHI by the value stored in the alloca.
+ // If not, we will demote this trivial PHI as any other one.
+ for (auto PNIt = PNtoDelete.rbegin(), PNEnd = PNtoDelete.rend();
+ PNIt != PNEnd; ++PNIt) {
+ PHINode *TrivPN = *PNIt;
+ assert(TrivPN->getNumIncomingValues() == 1 && "Assumed trivial PHI");
+
+ auto *InVal = TrivPN->getIncomingValue(0);
+ auto *InBB = TrivPN->getIncomingBlock(0);
+ const auto &ValLocIt = ValueLocToPhiMap.find(std::make_pair(InVal, InBB));
+ if (ValLocIt != ValueLocToPhiMap.end()) {
+ PHINode *InPHI = ValLocIt->second;
+ assert(PNallocMap.count(InPHI) &&
+ "Inconsitent state, PN was not demoted!");
+ auto *InPHIAlloca = PNallocMap[InPHI];
+ PNallocMap[TrivPN] = InPHIAlloca;
+ LoadInst *LI = new LoadInst(InPHIAlloca, "",
+ TrivPN->getParent()->getFirstInsertionPt());
+ TrivPN->replaceAllUsesWith(LI);
+ TrivPN->eraseFromParent();
+ continue;
+ }
- DemotePHIToStack(PN);
+ DemotePHI(TrivPN, PNallocMap, ValueLocToPhiMap);
}
// Move preserved PHINodes to the beginning of the BasicBlock.
- while (!PreservedPNs.empty()) {
- PHINode *PN = PreservedPNs.back();
- PreservedPNs.pop_back();
+ while (!PNtoPreserve.empty()) {
+ PHINode *PN = PNtoPreserve.back();
+ PNtoPreserve.pop_back();
BasicBlock *BB = PN->getParent();
if (PN == BB->begin())
diff --git a/polly/test/CodePreparation/multiple_loops_trivial_phis.ll b/polly/test/CodePreparation/multiple_loops_trivial_phis.ll
new file mode 100644
index 00000000000..f8964aec4f0
--- /dev/null
+++ b/polly/test/CodePreparation/multiple_loops_trivial_phis.ll
@@ -0,0 +1,56 @@
+; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s
+; ModuleID = 'multiple_loops_trivial_phis.ll'
+;
+; int f(int * __restrict__ A) {
+; int i, j, sum = 0;
+; for (i = 0; i < 100; i++) {
+; sum *= 2;
+; for (j = 0; j < 100; j++) {
+; sum += A[i+j];
+; }
+; }
+; return sum;
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @f(i32* noalias %A) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.inc5
+ %sum.04 = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.inc5 ]
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %2, %for.inc5 ]
+ %mul = shl nsw i32 %sum.04, 1
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %for.inc
+ %sum.12 = phi i32 [ %mul, %for.body ], [ %add4, %for.inc ]
+ %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+ %0 = add i64 %indvars.iv23, %indvars.iv1
+ %arrayidx = getelementptr i32* %A, i64 %0
+ %tmp5 = load i32* %arrayidx, align 4
+ %add4 = add nsw i32 %tmp5, %sum.12
+ %1 = add nuw nsw i64 %indvars.iv1, 1
+ %exitcond5 = icmp eq i64 %1, 100
+ br i1 %exitcond5, label %for.inc5, label %for.inc
+
+for.inc5: ; preds = %for.inc
+ %add4.lcssa = phi i32 [ %add4, %for.inc ]
+ %2 = add nuw nsw i64 %indvars.iv23, 1
+ %exitcond = icmp eq i64 %2, 100
+ br i1 %exitcond, label %for.end7, label %for.body
+
+for.end7: ; preds = %for.inc5
+ %add4.lcssa.lcssa = phi i32 [ %add4.lcssa, %for.inc5 ]
+ ret i32 %add4.lcssa.lcssa
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; Verify that only two allocas are created. (instead of 4!)
+; CHECK: alloca
+; CHECK: alloca
+; CHECK-NOT: alloca
diff --git a/polly/test/CodePreparation/single_loop_trivial_phi.ll b/polly/test/CodePreparation/single_loop_trivial_phi.ll
new file mode 100644
index 00000000000..e404da54f22
--- /dev/null
+++ b/polly/test/CodePreparation/single_loop_trivial_phi.ll
@@ -0,0 +1,50 @@
+; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s
+; ModuleID = 'single_loop_trivial_phi.ll'
+;
+; int f(int *A, int N) {
+; int i, sum = 0;
+; for (i = 0; i < N; i++)
+; sum += A[i];
+; return sum;
+; }
+; ModuleID = 'stack-slots.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @f(i32* %A, i32 %N) #0 {
+entry:
+ %cmp1 = icmp sgt i32 %N, 0
+ br i1 %cmp1, label %for.inc.lr.ph, label %for.end
+
+for.inc.lr.ph: ; preds = %entry
+ %0 = zext i32 %N to i64
+ br label %for.inc
+
+for.inc: ; preds = %for.inc.lr.ph, %for.inc
+ %sum.03 = phi i32 [ 0, %for.inc.lr.ph ], [ %add, %for.inc ]
+ %indvars.iv2 = phi i64 [ 0, %for.inc.lr.ph ], [ %indvars.iv.next, %for.inc ]
+ %arrayidx = getelementptr i32* %A, i64 %indvars.iv2
+ %tmp1 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %tmp1, %sum.03
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, %0
+ br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.inc
+ %add.lcssa = phi i32 [ %add, %for.inc ]
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+ ret i32 %sum.0.lcssa
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; Verify that only two allocas are created.
+; Both are needed for the %sum.0 PHI node and none should be created for the
+; %sum.0.lcssa PHI node
+; CHECK: alloca
+; CHECK: alloca
+; CHECK-NOT: alloca
OpenPOWER on IntegriCloud