diff options
-rw-r--r-- | polly/lib/Transform/CodePreparation.cpp | 101 | ||||
-rw-r--r-- | polly/test/CodePreparation/multiple_loops_trivial_phis.ll | 56 | ||||
-rw-r--r-- | polly/test/CodePreparation/single_loop_trivial_phi.ll | 50 |
3 files changed, 189 insertions, 18 deletions
diff --git a/polly/lib/Transform/CodePreparation.cpp b/polly/lib/Transform/CodePreparation.cpp index b0032498a8e..0989fe5f75b 100644 --- a/polly/lib/Transform/CodePreparation.cpp +++ b/polly/lib/Transform/CodePreparation.cpp @@ -49,6 +49,28 @@ using namespace llvm; using namespace polly; namespace { + +// Helper function which (for a given PHI node): +// +// 1) Remembers all incoming values and the associated basic blocks +// 2) Demotes the phi node to the stack +// 3) Remembers the correlation between the PHI node and the new alloca +// +// When we combine the information from 1) and 3) we know the values stored +// in this alloca at the end of the predecessor basic blocks of the PHI. +static void DemotePHI( + PHINode *PN, DenseMap<PHINode *, AllocaInst *> &PNallocMap, + DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> &ValueLocToPhiMap) { + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + auto *InVal = PN->getIncomingValue(i); + auto *InBB = PN->getIncomingBlock(i); + ValueLocToPhiMap[std::make_pair(InVal, InBB)] = PN; + } + + PNallocMap[PN] = DemotePHIToStack(PN); +} + /// @brief Prepare the IR for the scop detection. /// class CodePreparation : public FunctionPass { @@ -84,12 +106,23 @@ void CodePreparation::clear() {} CodePreparation::~CodePreparation() { clear(); } bool CodePreparation::eliminatePHINodes(Function &F) { - // The PHINodes that will be deleted. + // The PHINodes that will be demoted. + std::vector<PHINode *> PNtoDemote; + // The PHINodes that will be deleted (stack slot sharing). std::vector<PHINode *> PNtoDelete; // The PHINodes that will be preserved. - std::vector<PHINode *> PreservedPNs; - - // Scan the PHINodes in this function. + std::vector<PHINode *> PNtoPreserve; + // Map to remember values stored in PHINodes at the end of basic blocks. + DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> ValueLocToPhiMap; + // Map from PHINodes to their alloca (after demotion) counterpart. + DenseMap<PHINode *, AllocaInst *> PNallocMap; + + // Scan the PHINodes in this function and categorize them to be either: + // o Preserved, if they are (canonical) induction variables or can be + // synthesized during code generation ('SCEVable') + // o Deleted, if they are trivial PHI nodes (one incoming value) and the + // incoming value is a PHI node we will demote + // o Demoted, if they do not fit any of the previous categories for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) for (BasicBlock::iterator II = BI->begin(), IE = BI->getFirstNonPHI(); II != IE; ++II) { @@ -98,7 +131,7 @@ bool CodePreparation::eliminatePHINodes(Function &F) { if (SE->isSCEVable(PN->getType())) { const SCEV *S = SE->getSCEV(PN); if (!isa<SCEVUnknown>(S) && !isa<SCEVCouldNotCompute>(S)) { - PreservedPNs.push_back(PN); + PNtoPreserve.push_back(PN); continue; } } @@ -106,7 +139,7 @@ bool CodePreparation::eliminatePHINodes(Function &F) { if (Loop *L = LI->getLoopFor(BI)) { // Induction variables will be preserved. if (L->getCanonicalInductionVariable() == PN) { - PreservedPNs.push_back(PN); + PNtoPreserve.push_back(PN); continue; } } @@ -114,26 +147,58 @@ bool CodePreparation::eliminatePHINodes(Function &F) { // As DemotePHIToStack does not support invoke edges, we preserve // PHINodes that have invoke edges. - if (hasInvokeEdge(PN)) - PreservedPNs.push_back(PN); - else - PNtoDelete.push_back(PN); + if (hasInvokeEdge(PN)) { + PNtoPreserve.push_back(PN); + } else { + if (PN->getNumIncomingValues() == 1) + PNtoDelete.push_back(PN); + else + PNtoDemote.push_back(PN); + } } - if (PNtoDelete.empty()) + if (PNtoDemote.empty() && PNtoDelete.empty()) return false; - while (!PNtoDelete.empty()) { - PHINode *PN = PNtoDelete.back(); - PNtoDelete.pop_back(); + while (!PNtoDemote.empty()) { + PHINode *PN = PNtoDemote.back(); + PNtoDemote.pop_back(); + DemotePHI(PN, PNallocMap, ValueLocToPhiMap); + } + + // For each trivial PHI we encountered (and we want to delete) we try to find + // the value it will hold in a alloca we already created by PHI demotion. If + // we succeed (the incoming value is stored in an alloca at the predecessor + // block), we can replace the trivial PHI by the value stored in the alloca. + // If not, we will demote this trivial PHI as any other one. + for (auto PNIt = PNtoDelete.rbegin(), PNEnd = PNtoDelete.rend(); + PNIt != PNEnd; ++PNIt) { + PHINode *TrivPN = *PNIt; + assert(TrivPN->getNumIncomingValues() == 1 && "Assumed trivial PHI"); + + auto *InVal = TrivPN->getIncomingValue(0); + auto *InBB = TrivPN->getIncomingBlock(0); + const auto &ValLocIt = ValueLocToPhiMap.find(std::make_pair(InVal, InBB)); + if (ValLocIt != ValueLocToPhiMap.end()) { + PHINode *InPHI = ValLocIt->second; + assert(PNallocMap.count(InPHI) && + "Inconsitent state, PN was not demoted!"); + auto *InPHIAlloca = PNallocMap[InPHI]; + PNallocMap[TrivPN] = InPHIAlloca; + LoadInst *LI = new LoadInst(InPHIAlloca, "", + TrivPN->getParent()->getFirstInsertionPt()); + TrivPN->replaceAllUsesWith(LI); + TrivPN->eraseFromParent(); + continue; + } - DemotePHIToStack(PN); + DemotePHI(TrivPN, PNallocMap, ValueLocToPhiMap); } // Move preserved PHINodes to the beginning of the BasicBlock. - while (!PreservedPNs.empty()) { - PHINode *PN = PreservedPNs.back(); - PreservedPNs.pop_back(); + while (!PNtoPreserve.empty()) { + PHINode *PN = PNtoPreserve.back(); + PNtoPreserve.pop_back(); BasicBlock *BB = PN->getParent(); if (PN == BB->begin()) diff --git a/polly/test/CodePreparation/multiple_loops_trivial_phis.ll b/polly/test/CodePreparation/multiple_loops_trivial_phis.ll new file mode 100644 index 00000000000..f8964aec4f0 --- /dev/null +++ b/polly/test/CodePreparation/multiple_loops_trivial_phis.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s +; ModuleID = 'multiple_loops_trivial_phis.ll' +; +; int f(int * __restrict__ A) { +; int i, j, sum = 0; +; for (i = 0; i < 100; i++) { +; sum *= 2; +; for (j = 0; j < 100; j++) { +; sum += A[i+j]; +; } +; } +; return sum; +; } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @f(i32* noalias %A) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc5 + %sum.04 = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.inc5 ] + %indvars.iv23 = phi i64 [ 0, %entry ], [ %2, %for.inc5 ] + %mul = shl nsw i32 %sum.04, 1 + br label %for.inc + +for.inc: ; preds = %for.body, %for.inc + %sum.12 = phi i32 [ %mul, %for.body ], [ %add4, %for.inc ] + %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ] + %0 = add i64 %indvars.iv23, %indvars.iv1 + %arrayidx = getelementptr i32* %A, i64 %0 + %tmp5 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %tmp5, %sum.12 + %1 = add nuw nsw i64 %indvars.iv1, 1 + %exitcond5 = icmp eq i64 %1, 100 + br i1 %exitcond5, label %for.inc5, label %for.inc + +for.inc5: ; preds = %for.inc + %add4.lcssa = phi i32 [ %add4, %for.inc ] + %2 = add nuw nsw i64 %indvars.iv23, 1 + %exitcond = icmp eq i64 %2, 100 + br i1 %exitcond, label %for.end7, label %for.body + +for.end7: ; preds = %for.inc5 + %add4.lcssa.lcssa = phi i32 [ %add4.lcssa, %for.inc5 ] + ret i32 %add4.lcssa.lcssa +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; Verify that only two allocas are created. (instead of 4!) +; CHECK: alloca +; CHECK: alloca +; CHECK-NOT: alloca diff --git a/polly/test/CodePreparation/single_loop_trivial_phi.ll b/polly/test/CodePreparation/single_loop_trivial_phi.ll new file mode 100644 index 00000000000..e404da54f22 --- /dev/null +++ b/polly/test/CodePreparation/single_loop_trivial_phi.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s +; ModuleID = 'single_loop_trivial_phi.ll' +; +; int f(int *A, int N) { +; int i, sum = 0; +; for (i = 0; i < N; i++) +; sum += A[i]; +; return sum; +; } +; ModuleID = 'stack-slots.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @f(i32* %A, i32 %N) #0 { +entry: + %cmp1 = icmp sgt i32 %N, 0 + br i1 %cmp1, label %for.inc.lr.ph, label %for.end + +for.inc.lr.ph: ; preds = %entry + %0 = zext i32 %N to i64 + br label %for.inc + +for.inc: ; preds = %for.inc.lr.ph, %for.inc + %sum.03 = phi i32 [ 0, %for.inc.lr.ph ], [ %add, %for.inc ] + %indvars.iv2 = phi i64 [ 0, %for.inc.lr.ph ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr i32* %A, i64 %indvars.iv2 + %tmp1 = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp1, %sum.03 + %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %0 + br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + %add.lcssa = phi i32 [ %add, %for.inc ] + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ] + ret i32 %sum.0.lcssa +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; Verify that only two allocas are created. +; Both are needed for the %sum.0 PHI node and none should be created for the +; %sum.0.lcssa PHI node +; CHECK: alloca +; CHECK: alloca +; CHECK-NOT: alloca |