diff options
| author | Cameron Zwarich <zwarich@apple.com> | 2011-01-17 17:38:41 +0000 | 
|---|---|---|
| committer | Cameron Zwarich <zwarich@apple.com> | 2011-01-17 17:38:41 +0000 | 
| commit | b410858a5f15014ae2f8c66ed77ed0fc87f27f37 (patch) | |
| tree | 045fe2436d4c3681191471a1f46672337073c7e0 /llvm/lib | |
| parent | ea49cb04a53acdb11de8c3258c8a0011368c0ee9 (diff) | |
| download | bcm5719-llvm-b410858a5f15014ae2f8c66ed77ed0fc87f27f37.tar.gz bcm5719-llvm-b410858a5f15014ae2f8c66ed77ed0fc87f27f37.zip  | |
Roll r123609 back in with two changes that fix test failures with expensive
checks enabled:
1) Use '<' to compare integers in a comparison function rather than '<='.
2) Use the uniqued set DefBlocks rather than Info.DefiningBlocks to initialize
the priority queue.
The speedup of scalarrepl on test-suite + SPEC2000 + SPEC2006 is a bit less, at
just under 16% rather than 17%.
llvm-svn: 123662
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/Mem2Reg.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 169 | 
3 files changed, 122 insertions, 61 deletions
diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a068556b734..5c90a360237 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -152,7 +152,6 @@ namespace {      // will not alter the CFG, so say so.      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequired<DominatorTree>(); -      AU.addRequired<DominanceFrontier>();        AU.setPreservesCFG();      }    }; @@ -180,7 +179,6 @@ char SROA_SSAUp::ID = 0;  INITIALIZE_PASS_BEGIN(SROA_DF, "scalarrepl",                  "Scalar Replacement of Aggregates (DF)", false, false)  INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)  INITIALIZE_PASS_END(SROA_DF, "scalarrepl",                  "Scalar Replacement of Aggregates (DF)", false, false) @@ -877,11 +875,8 @@ public:  bool SROA::performPromotion(Function &F) {    std::vector<AllocaInst*> Allocas;    DominatorTree *DT = 0; -  DominanceFrontier *DF = 0; -  if (HasDomFrontiers) { +  if (HasDomFrontiers)      DT = &getAnalysis<DominatorTree>(); -    DF = &getAnalysis<DominanceFrontier>(); -  }    BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function @@ -900,7 +895,7 @@ bool SROA::performPromotion(Function &F) {      if (Allocas.empty()) break;      if (HasDomFrontiers) -      PromoteMemToReg(Allocas, *DT, *DF); +      PromoteMemToReg(Allocas, *DT);      else {        SSAUpdater SSA;        for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { diff --git a/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 2b1364c81e7..ea2f8fca5f2 100644 --- a/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -40,7 +40,6 @@ namespace {      //      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequired<DominatorTree>(); -      AU.addRequired<DominanceFrontier>();        AU.setPreservesCFG();        // This is a cluster of orthogonal Transforms        AU.addPreserved<UnifyFunctionExitNodes>(); @@ -54,7 +53,6 @@ char PromotePass::ID = 0;  INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",                  false, false)  INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)  INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",                  false, false) @@ -66,7 +64,6 @@ bool PromotePass::runOnFunction(Function &F) {    bool Changed  = false;    DominatorTree &DT = getAnalysis<DominatorTree>(); -  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();    while (1) {      Allocas.clear(); @@ -80,7 +77,7 @@ bool PromotePass::runOnFunction(Function &F) {      if (Allocas.empty()) break; -    PromoteMemToReg(Allocas, DT, DF); +    PromoteMemToReg(Allocas, DT);      NumPromoted += Allocas.size();      Changed = true;    } diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index d00f870fdfa..82e565d1fb5 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -9,10 +9,19 @@  //  // This file promotes memory references to be register references.  It promotes  // alloca instructions which only have loads and stores as uses.  An alloca is -// transformed by using dominator frontiers to place PHI nodes, then traversing -// the function in depth-first order to rewrite loads and stores as appropriate. -// This is just the standard SSA construction algorithm to construct "pruned" -// SSA form. +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +//   Programming Languages +//   POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information.  //  //===----------------------------------------------------------------------===// @@ -35,6 +44,7 @@  #include "llvm/ADT/STLExtras.h"  #include "llvm/Support/CFG.h"  #include <algorithm> +#include <queue>  using namespace llvm;  STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); @@ -179,7 +189,6 @@ namespace {      ///      std::vector<AllocaInst*> Allocas;      DominatorTree &DT; -    DominanceFrontier &DF;      DIFactory *DIF;      /// AST - An AliasSetTracker object to update.  If null, don't update it. @@ -217,12 +226,15 @@ namespace {      /// non-determinstic behavior.      DenseMap<BasicBlock*, unsigned> BBNumbers; +    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. +    DenseMap<DomTreeNode*, unsigned> DomLevels; +      /// BBNumPreds - Lazily compute the number of predecessors a block has.      DenseMap<const BasicBlock*, unsigned> BBNumPreds;    public:      PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, -                   DominanceFrontier &df, AliasSetTracker *ast) -      : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} +                   AliasSetTracker *ast) +      : Allocas(A), DT(dt), DIF(0), AST(ast) {}      ~PromoteMem2Reg() {        delete DIF;      } @@ -325,11 +337,19 @@ namespace {        DbgDeclare = FindAllocaDbgDeclare(AI);      }    }; + +  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; + +  struct DomTreeNodeCompare { +    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { +      return LHS.second < RHS.second; +    } +  };  }  // end of anonymous namespace  void PromoteMem2Reg::run() { -  Function &F = *DF.getRoot()->getParent(); +  Function &F = *DT.getRoot()->getParent();    if (AST) PointerAllocaValues.resize(Allocas.size());    AllocaDbgDeclares.resize(Allocas.size()); @@ -422,7 +442,26 @@ void PromoteMem2Reg::run() {          continue;        }      } -     + +    // If we haven't computed dominator tree levels, do so now. +    if (DomLevels.empty()) { +      SmallVector<DomTreeNode*, 32> Worklist; + +      DomTreeNode *Root = DT.getRootNode(); +      DomLevels[Root] = 0; +      Worklist.push_back(Root); + +      while (!Worklist.empty()) { +        DomTreeNode *Node = Worklist.pop_back_val(); +        unsigned ChildLevel = DomLevels[Node] + 1; +        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); +             CI != CE; ++CI) { +          DomLevels[*CI] = ChildLevel; +          Worklist.push_back(*CI); +        } +      } +    } +      // If we haven't computed a numbering for the BB's in the function, do so      // now.      if (BBNumbers.empty()) { @@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,  /// avoiding insertion of dead phi nodes.  void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,                                               AllocaInfo &Info) { -    // Unique the set of defining blocks for efficient lookup.    SmallPtrSet<BasicBlock*, 32> DefBlocks;    DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -673,46 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,    SmallPtrSet<BasicBlock*, 32> LiveInBlocks;    ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); -  // Compute the locations where PhiNodes need to be inserted.  Look at the -  // dominance frontier of EACH basic-block we have a write in. -  unsigned CurrentVersion = 0; +  // Use a priority queue keyed on dominator tree level so that inserted nodes +  // are handled from the bottom of the dominator tree upwards. +  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, +                              DomTreeNodeCompare> IDFPriorityQueue; +  IDFPriorityQueue PQ; + +  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), +       E = DefBlocks.end(); I != E; ++I) { +    if (DomTreeNode *Node = DT.getNode(*I)) +      PQ.push(std::make_pair(Node, DomLevels[Node])); +  } +    std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; -  while (!Info.DefiningBlocks.empty()) { -    BasicBlock *BB = Info.DefiningBlocks.back(); -    Info.DefiningBlocks.pop_back(); -     -    // Look up the DF for this write, add it to defining blocks. -    DominanceFrontier::const_iterator it = DF.find(BB); -    if (it == DF.end()) continue; -     -    const DominanceFrontier::DomSetType &S = it->second; -     -    // In theory we don't need the indirection through the DFBlocks vector. -    // In practice, the order of calling QueuePhiNode would depend on the -    // (unspecified) ordering of basic blocks in the dominance frontier, -    // which would give PHI nodes non-determinstic subscripts.  Fix this by -    // processing blocks in order of the occurance in the function. -    for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), -         PE = S.end(); P != PE; ++P) { -      // If the frontier block is not in the live-in set for the alloca, don't -      // bother processing it. -      if (!LiveInBlocks.count(*P)) -        continue; -       -      DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); -    } -     -    // Sort by which the block ordering in the function. -    if (DFBlocks.size() > 1) -      std::sort(DFBlocks.begin(), DFBlocks.end()); -     -    for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { -      BasicBlock *BB = DFBlocks[i].second; -      if (QueuePhiNode(BB, AllocaNum, CurrentVersion)) -        Info.DefiningBlocks.push_back(BB); +  SmallPtrSet<DomTreeNode*, 32> Visited; +  SmallVector<DomTreeNode*, 32> Worklist; +  while (!PQ.empty()) { +    DomTreeNodePair RootPair = PQ.top(); +    PQ.pop(); +    DomTreeNode *Root = RootPair.first; +    unsigned RootLevel = RootPair.second; + +    // Walk all dominator tree children of Root, inspecting their CFG edges with +    // targets elsewhere on the dominator tree. Only targets whose level is at +    // most Root's level are added to the iterated dominance frontier of the +    // definition set. + +    Worklist.clear(); +    Worklist.push_back(Root); + +    while (!Worklist.empty()) { +      DomTreeNode *Node = Worklist.pop_back_val(); +      BasicBlock *BB = Node->getBlock(); + +      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; +           ++SI) { +        DomTreeNode *SuccNode = DT.getNode(*SI); + +        // Quickly skip all CFG edges that are also dominator tree edges instead +        // of catching them below. +        if (SuccNode->getIDom() == Node) +          continue; + +        unsigned SuccLevel = DomLevels[SuccNode]; +        if (SuccLevel > RootLevel) +          continue; + +        if (!Visited.insert(SuccNode)) +          continue; + +        BasicBlock *SuccBB = SuccNode->getBlock(); +        if (!LiveInBlocks.count(SuccBB)) +          continue; + +        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); +        if (!DefBlocks.count(SuccBB)) +          PQ.push(std::make_pair(SuccNode, SuccLevel)); +      } + +      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; +           ++CI) { +        if (!Visited.count(*CI)) +          Worklist.push_back(*CI); +      }      } -    DFBlocks.clear();    } + +  if (DFBlocks.size() > 1) +    std::sort(DFBlocks.begin(), DFBlocks.end()); + +  unsigned CurrentVersion = 0; +  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) +    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);  }  /// RewriteSingleStoreAlloca - If there is only a single store to this value, @@ -1040,10 +1110,9 @@ NextIteration:  /// made to the IR.  ///  void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, -                           DominatorTree &DT, DominanceFrontier &DF, -                           AliasSetTracker *AST) { +                           DominatorTree &DT, AliasSetTracker *AST) {    // If there is nothing to do, bail out...    if (Allocas.empty()) return; -  PromoteMem2Reg(Allocas, DT, DF, AST).run(); +  PromoteMem2Reg(Allocas, DT, AST).run();  }  | 

