diff options
| author | Nadav Rotem <nrotem@apple.com> | 2013-06-20 17:41:45 +0000 | 
|---|---|---|
| committer | Nadav Rotem <nrotem@apple.com> | 2013-06-20 17:41:45 +0000 | 
| commit | 14a89c5428666eac2659caa73182691e9a88bcf9 (patch) | |
| tree | 35cebe5d8366e8b29e51028941cfa3adf5176a8d /llvm | |
| parent | 8300e1299116ed89ae81609d453756cfeb4084d3 (diff) | |
| download | bcm5719-llvm-14a89c5428666eac2659caa73182691e9a88bcf9.tar.gz bcm5719-llvm-14a89c5428666eac2659caa73182691e9a88bcf9.zip  | |
SLPVectorization:  Add a basic support for cross-basic block slp vectorization.
We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent
hints for vectorization of other basic blocks.
llvm-svn: 184444
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 80 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.h | 7 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll | 54 | 
4 files changed, 134 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2b07fa2c015..91c04633244 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -99,7 +99,10 @@ struct SLPVectorizer : public FunctionPass {        }        // Try to hoist some of the scalarization code to the preheader. -      if (BBChanged) hoistGatherSequence(LI, BB, R); +      if (BBChanged) { +        hoistGatherSequence(LI, BB, R); +        Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions()); +      }        Changed |= BBChanged;      } @@ -130,8 +133,10 @@ private:    /// \brief Try to vectorize a chain that starts at two arithmetic instrs.    bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R); -  /// \brief Try to vectorize a list of operands. -  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); +  /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true +  /// then we calculate the cost of extracting the scalars from the vector. +  /// \returns true if a value was vectorized. +  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);    /// \brief Try to vectorize a chain that may start at the operands of \V;    bool tryToVectorize(BinaryOperator *V,  BoUpSLP &R); @@ -143,6 +148,13 @@ private:    /// all of the sources are loop invariant.    void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); +  /// \brief Try to vectorize additional sequences in different basic blocks +  /// based on values that we gathered in previous blocks. The list \p Gathers +  /// holds the gather InsertElement instructions that were generated during +  /// vectorization. +  /// \returns True if some code was vectorized. +  bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers); +    /// \brief Scan the basic block and look for patterns that are likely to start    /// a vectorization chain.    bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); @@ -179,10 +191,11 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {  bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {    if (!A || !B) return false;    Value *VL[] = { A, B }; -  return tryToVectorizeList(VL, R); +  return tryToVectorizeList(VL, R, true);  } -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, +                                       bool NeedExtracts) {    if (VL.size() < 2)      return false; @@ -204,7 +217,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {    }    int Cost = R.getTreeCost(VL); -  int ExtrCost = R.getScalarizationCost(VL); +  int ExtrCost =  NeedExtracts ? R.getScalarizationCost(VL) : 0;    DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<          " Cost of extract:" << ExtrCost << ".\n");    if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; @@ -307,7 +320,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {      }      if (Incoming.size() > 1) -      Changed |= tryToVectorizeList(Incoming, R); +      Changed |= tryToVectorizeList(Incoming, R, true);    }    return Changed; @@ -329,6 +342,51 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {    return Changed;  } +bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { +  SmallVector<Value*, 4> Seq; +  bool Changed = false; +  for (int i = 0, e = Gathers.size(); i < e; ++i) { +    InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]); + +    if (IEI) { +      if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1))) +        Seq.push_back(I); +    } else { + +      if (!Seq.size()) +        continue; + +      Instruction *I = cast<Instruction>(Seq[0]); +      BasicBlock *BB = I->getParent(); + +      DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << +            " in " << BB->getName() << ".\n"); + +      // Check if the gathered values have multiple uses. If they only have one +      // user then we know that the insert/extract pair will go away. +      bool HasMultipleUsers = false; +      for (int i=0; e = Seq.size(), i < e; ++i) { +        if (!Seq[i]->hasOneUse()) { +          HasMultipleUsers = true; +          break; +        } +      } + +      BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); + +      if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { +        DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << +              " in " << BB->getName() << ".\n"); +        Changed = true; +      } + +      Seq.clear(); +    } +  } + +  return Changed; +} +  void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,                                          BoUpSLP &R) {    // Check if this block is inside a loop. @@ -344,12 +402,14 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,    // Mark the insertion point for the block.    Instruction *Location = PreHeader->getTerminator(); -  BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); -  for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); +  BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions(); +  for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end();         it != e; ++it) { -    InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); +    InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);      // The InsertElement sequence can be simplified into a constant. +    // Also Ignore NULL pointers because they are only here to separate +    // sequences.      if (!Insert)        continue; diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/llvm/lib/Transforms/Vectorize/VecUtils.cpp index e79f08a56dd..88c457d1176 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VecUtils.cpp @@ -731,9 +731,13 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {      // Remember that this instruction is used as part of a 'gather' sequence.      // The caller of the bottom-up slp vectorizer can try to hoist the sequence      // if the users are outside of the basic block. -    GatherInstructions.push_back(Vec); +    if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec)) +      GatherInstructions.push_back(IEI);    } +  // Mark the end of the gather sequence. +  GatherInstructions.push_back(0); +    for (unsigned i = 0; i < Ty->getNumElements(); ++i)      VectorizedValues[VL[i]] = Vec; diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.h b/llvm/lib/Transforms/Vectorize/VecUtils.h index f76ae8413d7..28a61e3c0dd 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.h +++ b/llvm/lib/Transforms/Vectorize/VecUtils.h @@ -34,6 +34,7 @@ class Loop;  /// Bottom Up SLP vectorization utility class.  struct BoUpSLP  {    typedef SmallVector<Value*, 8> ValueList; +  typedef SmallVector<Instruction*, 16> InstrList;    typedef SmallPtrSet<Value*, 16> ValueSet;    typedef SmallVector<StoreInst*, 8> StoreList;    static const int max_cost = 1<<20; @@ -78,7 +79,7 @@ struct BoUpSLP  {    /// \returns the list of new instructions that were added in order to collect    /// scalars into vectors. This list can be used to further optimize the gather    /// sequences. -  ValueList &getGatherSeqInstructions() {return GatherInstructions; } +  InstrList &getGatherSeqInstructions() {return GatherInstructions; }  private:    /// \brief This method contains the recursive part of getTreeCost. @@ -166,7 +167,9 @@ private:    /// A list of instructions that are used when gathering scalars into vectors.    /// In many cases these instructions can be hoisted outside of the BB.    /// Iterating over this list is faster than calling LICM. -  ValueList GatherInstructions; +  /// Notice: We insert NULL ptrs to separate between the different gather +  /// sequences. +   InstrList GatherInstructions;    /// Instruction builder to construct the vectorized tree.    IRBuilder<> Builder; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll new file mode 100644 index 00000000000..8246453519b --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; int foo(double *A, float *B, int g) { +;   float B0 = B[0]; +;   float B1 = B[1]; <----- BasicBlock #1 +;   B0 += 5; +;   B1 += 8; +; +;   if (g) bar(); +; +;   A[0] += B0;     <------- BasicBlock #3 +;   A[1] += B1; +; } + + +;CHECK: @foo +;CHECK: load <2 x float> +;CHECK: fadd <2 x float> +;CHECK: call i32 +;CHECK: load <2 x double> +;CHECK: fadd <2 x double> +;CHECK: store <2 x double> +;CHECK: ret +define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) { +entry: +  %0 = load float* %B, align 4 +  %arrayidx1 = getelementptr inbounds float* %B, i64 1 +  %1 = load float* %arrayidx1, align 4 +  %add = fadd float %0, 5.000000e+00 +  %add2 = fadd float %1, 8.000000e+00 +  %tobool = icmp eq i32 %g, 0 +  br i1 %tobool, label %if.end, label %if.then + +if.then: +  %call = tail call i32 (...)* @bar() +  br label %if.end + +if.end: +  %conv = fpext float %add to double +  %2 = load double* %A, align 8 +  %add4 = fadd double %conv, %2 +  store double %add4, double* %A, align 8 +  %conv5 = fpext float %add2 to double +  %arrayidx6 = getelementptr inbounds double* %A, i64 1 +  %3 = load double* %arrayidx6, align 8 +  %add7 = fadd double %conv5, %3 +  store double %add7, double* %arrayidx6, align 8 +  ret i32 undef +} + +declare i32 @bar(...)  | 

