1 files changed, 124 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 66333220971..dd50db346b0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -60,6 +60,7 @@ class Value;
 class VPBasicBlock;
 class VPRegionBlock;
 class VPlan;
+class VPlanSlp;
 
 /// A range of powers-of-2 vectorization factors with fixed start and
 /// adjustable end. The range includes start and excludes end, e.g.,:
@@ -609,10 +610,16 @@ public:
 /// the VPInstruction is also a single def-use vertex.
 class VPInstruction : public VPUser, public VPRecipeBase {
   friend class VPlanHCFGTransforms;
+  friend class VPlanSlp;
 
 public:
   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
-  enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };
+  enum {
+    Not = Instruction::OtherOpsEnd + 1,
+    ICmpULE,
+    SLPLoad,
+    SLPStore,
+  };
 
 private:
   typedef unsigned char OpcodeTy;
@@ -622,6 +629,13 @@ private:
   /// modeled instruction.
   void generateInstruction(VPTransformState &State, unsigned Part);
 
+protected:
+  Instruction *getUnderlyingInstr() {
+    return cast_or_null<Instruction>(getUnderlyingValue());
+  }
+
+  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
 public:
   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
       : VPUser(VPValue::VPInstructionSC, Operands),
@@ -635,6 +649,11 @@ public:
     return V->getVPValueID() == VPValue::VPInstructionSC;
   }
 
+  VPInstruction *clone() const {
+    SmallVector<VPValue *, 2> Operands(operands());
+    return new VPInstruction(Opcode, Operands);
+  }
+
   /// Method to support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const VPRecipeBase *R) {
     return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
@@ -652,6 +671,14 @@ public:
 
   /// Print the VPInstruction.
   void print(raw_ostream &O) const;
+
+  /// Return true if this instruction may modify memory.
+  bool mayWriteToMemory() const {
+    // TODO: we can use attributes of the called function to rule out memory
+    //       modifications.
+    return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+           Opcode == Instruction::Invoke || Opcode == SLPStore;
+  }
 };
 
 /// VPWidenRecipe is a recipe for producing a copy of vector type for each
@@ -1508,6 +1535,102 @@ public:
   }
 };
 
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+private:
+  enum class OpMode { Failed, Load, Opcode };
+
+  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+  /// DenseMap keys.
+  struct BundleDenseMapInfo {
+    static SmallVector<VPValue *, 4> getEmptyKey() {
+      return {reinterpret_cast<VPValue *>(-1)};
+    }
+
+    static SmallVector<VPValue *, 4> getTombstoneKey() {
+      return {reinterpret_cast<VPValue *>(-2)};
+    }
+
+    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+    }
+
+    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+                        const SmallVector<VPValue *, 4> &RHS) {
+      return LHS == RHS;
+    }
+  };
+
+  /// Mapping of values in the original VPlan to a combined VPInstruction.
+  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+      BundleToCombined;
+
+  VPInterleavedAccessInfo &IAI;
+
+  /// Basic block to operate on. For now, only instructions in a single BB are
+  /// considered.
+  const VPBasicBlock &BB;
+
+  /// Indicates whether we managed to combine all visited instructions or not.
+  bool CompletelySLP = true;
+
+  /// Width of the widest combined bundle in bits.
+  unsigned WidestBundleBits = 0;
+
+  using MultiNodeOpTy =
+      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+  // Input operand bundles for the current multi node. Each multi node operand
+  // bundle contains values not matching the multi node's opcode. They will
+  // be reordered in reorderMultiNodeOps, once we completed building a
+  // multi node.
+  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+  /// Indicates whether we are building a multi node currently.
+  bool MultiNodeActive = false;
+
+  /// Check if we can vectorize Operands together.
+  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+  /// Add combined instruction \p New for the bundle \p Operands.
+  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+  VPInstruction *markFailed();
+
+  /// Reorder operands in the multi node to maximize sequential memory access
+  /// and commutative operations.
+  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+  /// Choose the best candidate to use for the lane after \p Last. The set of
+  /// candidates to choose from are values with an opcode matching \p Last's
+  /// or loads consecutive to \p Last.
+  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+                                       SmallVectorImpl<VPValue *> &Candidates,
+                                       VPInterleavedAccessInfo &IAI);
+
+  /// Print bundle \p Values to dbgs().
+  void dumpBundle(ArrayRef<VPValue *> Values);
+
+public:
+  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+  ~VPlanSlp() {
+    for (auto &KV : BundleToCombined)
+      delete KV.second;
+  }
+
+  /// Tries to build an SLP tree rooted at \p Operands and returns a
+  /// VPInstruction combining \p Operands, if they can be combined.
+  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+  /// Return the width of the widest combined bundle in bits.
+  unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+  /// Return true if all visited instruction can be combined.
+  bool isCompletelySLP() const { return CompletelySLP; }
+};
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H