recommit 344472 after fixing build failure on ARM and PPC.

llvm-svn: 344475
author: Dorit Nuzman <dorit.nuzman@intel.com> 2018-10-14 08:50:06 +0000
committer: Dorit Nuzman <dorit.nuzman@intel.com> 2018-10-14 08:50:06 +0000
commit: 38bbf81ade2fa94018db2f0777fa19de71c78f41 (patch)
tree: 6e26fb98d4d69d913a700cd79ea468b9d0f59194 /llvm/include
parent: 74587a0e489dec97780404f064b7768c7ab6df22 (diff)
download: bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.tar.gz
bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.zip
4 files changed, 72 insertions, 12 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 18b5a5cf0e5..c2a9d1ec195 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -587,6 +587,10 @@ public:
   /// Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
 
+  /// Enable matching of interleaved access groups that contain predicated 
+  /// accesses and are vectorized using masked vector loads/stores.
+  bool enableMaskedInterleavedAccessVectorization() const;
+
   /// Indicate that it is potentially unsafe to automatically vectorize
   /// floating-point operations because the semantics of vector and scalar
   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
@@ -821,9 +825,11 @@ public:
   ///    load allows gaps)
   /// \p Alignment is the alignment of the memory operation
   /// \p AddressSpace is address space of the pointer.
+  /// \p IsMasked indicates if the memory access is predicated.
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace) const;
+                                 unsigned AddressSpace, 
+                                 bool IsMasked = false) const;
 
   /// Calculate the cost of performing a vector reduction.
   ///
@@ -1072,6 +1078,7 @@ public:
   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
       bool IsZeroCmp) const = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
+  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
                                               unsigned BitWidth,
@@ -1132,7 +1139,8 @@ public:
                                          unsigned Factor,
                                          ArrayRef<unsigned> Indices,
                                          unsigned Alignment,
-                                         unsigned AddressSpace) = 0;
+                                         unsigned AddressSpace,
+                                         bool IsMasked = false) = 0;
   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                          bool IsPairwiseForm) = 0;
   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
@@ -1346,6 +1354,9 @@ public:
   bool enableInterleavedAccessVectorization() override {
     return Impl.enableInterleavedAccessVectorization();
   }
+  bool enableMaskedInterleavedAccessVectorization() override {
+    return Impl.enableMaskedInterleavedAccessVectorization();
+  }
   bool isFPVectorizationPotentiallyUnsafe() override {
     return Impl.isFPVectorizationPotentiallyUnsafe();
   }
@@ -1471,9 +1482,9 @@ public:
   }
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace) override {
+                                 unsigned AddressSpace, bool IsMasked) override {
     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace, IsMasked);
   }
   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                  bool IsPairwiseForm) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e39fe66c0a4..c64d4d36805 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -313,6 +313,8 @@ public:
 
   bool enableInterleavedAccessVectorization() { return false; }
 
+  bool enableMaskedInterleavedAccessVectorization() { return false; }
+
   bool isFPVectorizationPotentiallyUnsafe() { return false; }
 
   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
@@ -450,8 +452,8 @@ public:
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                       unsigned Factor,
                                       ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) {
+                                      unsigned Alignment, unsigned AddressSpace,
+                                      bool IsMasked = false) {
     return 1;
   }
 
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 622d932f74f..2ac49f67662 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -125,6 +125,21 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
 /// This function always sets a (possibly null) value for each K in Kinds.
 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
 
+/// Create a mask with replicated elements.
+///
+/// This function creates a shuffle mask for replicating each of the \p VF 
+/// elements in a vector \p ReplicationFactor times. It can be used to
+/// transform a mask of \p VF elements into a mask of
+/// \p VF * \p ReplicationFactor elements used by a predicated
+/// interleaved-group of loads/stores whose Interleaved-factor ==
+/// \p ReplicationFactor.
+///
+/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+///
+///   <0,0,0,1,1,1,2,2,2,3,3,3>
+Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
+                               unsigned VF);
+
 /// Create an interleave shuffle mask.
 ///
 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
@@ -328,7 +343,7 @@ public:
   InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
                         DominatorTree *DT, LoopInfo *LI,
                         const LoopAccessInfo *LAI)
-    : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
+      : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
 
   ~InterleavedAccessInfo() {
     SmallPtrSet<InterleaveGroup *, 4> DelSet;
@@ -341,7 +356,9 @@ public:
 
   /// Analyze the interleaved accesses and collect them in interleave
   /// groups. Substitute symbolic strides using \p Strides.
-  void analyzeInterleaving();
+  /// Consider also predicated loads/stores in the analysis if
+  /// \p EnableMaskedInterleavedGroup is true.
+  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
 
   /// Check if \p Instr belongs to any interleave group.
   bool isInterleaved(Instruction *Instr) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index b460cdc0ba1..e740fe57172 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -783,8 +783,8 @@ public:
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                       unsigned Factor,
                                       ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) {
+                                      unsigned Alignment, unsigned AddressSpace,
+                                      bool IsMasked = false) {
     VectorType *VT = dyn_cast<VectorType>(VecTy);
     assert(VT && "Expect a vector type for interleaved memory op");
 
@@ -795,8 +795,13 @@ public:
     VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
 
     // Firstly, the cost of load/store operation.
-    unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
-        Opcode, VecTy, Alignment, AddressSpace);
+    unsigned Cost;
+    if (IsMasked)
+      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
+          Opcode, VecTy, Alignment, AddressSpace);
+    else
+      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
+                                                     AddressSpace);
 
     // Legalize the vector type, and get the legalized and unlegalized type
     // sizes.
@@ -892,6 +897,31 @@ public:
                     ->getVectorInstrCost(Instruction::InsertElement, VT, i);
     }
 
+    if (!IsMasked)
+      return Cost;
+
+    Type *I8Type = Type::getInt8Ty(VT->getContext());
+    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
+    SubVT = VectorType::get(I8Type, NumSubElts);
+
+    // The Mask shuffling cost is extract all the elements of the Mask
+    // and insert each of them Factor times into the wide vector:
+    //
+    // E.g. an interleaved group with factor 3:
+    //    %mask = icmp ult <8 x i32> %vec1, %vec2
+    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+    // The cost is estimated as extract all mask elements from the <8xi1> mask
+    // vector and insert them factor times into the <24xi1> shuffled mask
+    // vector.
+    for (unsigned i = 0; i < NumSubElts; i++)
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::ExtractElement, SubVT, i);
+
+    for (unsigned i = 0; i < NumElts; i++)
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::InsertElement, MaskVT, i);
+
     return Cost;
   }
author	Dorit Nuzman <dorit.nuzman@intel.com>	2018-10-14 08:50:06 +0000
committer	Dorit Nuzman <dorit.nuzman@intel.com>	2018-10-14 08:50:06 +0000
commit	38bbf81ade2fa94018db2f0777fa19de71c78f41 (patch)
tree	6e26fb98d4d69d913a700cd79ea468b9d0f59194 /llvm/include
parent	74587a0e489dec97780404f064b7768c7ab6df22 (diff)
download	bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.tar.gz bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.zip