diff options
| author | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-14 08:50:06 +0000 |
|---|---|---|
| committer | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-14 08:50:06 +0000 |
| commit | 38bbf81ade2fa94018db2f0777fa19de71c78f41 (patch) | |
| tree | 6e26fb98d4d69d913a700cd79ea468b9d0f59194 /llvm/include | |
| parent | 74587a0e489dec97780404f064b7768c7ab6df22 (diff) | |
| download | bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.tar.gz bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.zip | |
recommit 344472 after fixing build failure on ARM and PPC.
llvm-svn: 344475
Diffstat (limited to 'llvm/include')
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 19 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 6 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/VectorUtils.h | 21 | ||||
| -rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 38 |
4 files changed, 72 insertions, 12 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 18b5a5cf0e5..c2a9d1ec195 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -587,6 +587,10 @@ public: /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; + /// Enable matching of interleaved access groups that contain predicated + /// accesses and are vectorized using masked vector loads/stores. + bool enableMaskedInterleavedAccessVectorization() const; + /// Indicate that it is potentially unsafe to automatically vectorize /// floating-point operations because the semantics of vector and scalar /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math @@ -821,9 +825,11 @@ public: /// load allows gaps) /// \p Alignment is the alignment of the memory operation /// \p AddressSpace is address space of the pointer. + /// \p IsMasked indicates if the memory access is predicated. int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace, + bool IsMasked = false) const; /// Calculate the cost of performing a vector reduction. /// @@ -1072,6 +1078,7 @@ public: virtual const MemCmpExpansionOptions *enableMemCmpExpansion( bool IsZeroCmp) const = 0; virtual bool enableInterleavedAccessVectorization() = 0; + virtual bool enableMaskedInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, @@ -1132,7 +1139,8 @@ public: unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace) = 0; + unsigned AddressSpace, + bool IsMasked = false) = 0; virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, @@ -1346,6 +1354,9 @@ public: bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } + bool enableMaskedInterleavedAccessVectorization() override { + return Impl.enableMaskedInterleavedAccessVectorization(); + } bool isFPVectorizationPotentiallyUnsafe() override { return Impl.isFPVectorizationPotentiallyUnsafe(); } @@ -1471,9 +1482,9 @@ public: } int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace) override { + unsigned AddressSpace, bool IsMasked) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, IsMasked); } int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index e39fe66c0a4..c64d4d36805 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -313,6 +313,8 @@ public: bool enableInterleavedAccessVectorization() { return false; } + bool enableMaskedInterleavedAccessVectorization() { return false; } + bool isFPVectorizationPotentiallyUnsafe() { return false; } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -450,8 +452,8 @@ public: unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, - unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + bool IsMasked = false) { return 1; } diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 622d932f74f..2ac49f67662 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -125,6 +125,21 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, /// This function always sets a (possibly null) value for each K in Kinds. Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); +/// Create a mask with replicated elements. +/// +/// This function creates a shuffle mask for replicating each of the \p VF +/// elements in a vector \p ReplicationFactor times. It can be used to +/// transform a mask of \p VF elements into a mask of +/// \p VF * \p ReplicationFactor elements used by a predicated +/// interleaved-group of loads/stores whose Interleaved-factor == +/// \p ReplicationFactor. +/// +/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: +/// +/// <0,0,0,1,1,1,2,2,2,3,3,3> +Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, + unsigned VF); + /// Create an interleave shuffle mask. /// /// This function creates a shuffle mask for interleaving \p NumVecs vectors of @@ -328,7 +343,7 @@ public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI) - : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} + : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} ~InterleavedAccessInfo() { SmallPtrSet<InterleaveGroup *, 4> DelSet; @@ -341,7 +356,9 @@ public: /// Analyze the interleaved accesses and collect them in interleave /// groups. Substitute symbolic strides using \p Strides. - void analyzeInterleaving(); + /// Consider also predicated loads/stores in the analysis if + /// \p EnableMaskedInterleavedGroup is true. + void analyzeInterleaving(bool EnableMaskedInterleavedGroup); /// Check if \p Instr belongs to any interleave group. bool isInterleaved(Instruction *Instr) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b460cdc0ba1..e740fe57172 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -783,8 +783,8 @@ public: unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, - unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + bool IsMasked = false) { VectorType *VT = dyn_cast<VectorType>(VecTy); assert(VT && "Expect a vector type for interleaved memory op"); @@ -795,8 +795,13 @@ public: VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); // Firstly, the cost of load/store operation. - unsigned Cost = static_cast<T *>(this)->getMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace); + unsigned Cost; + if (IsMasked) + Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( + Opcode, VecTy, Alignment, AddressSpace); + else + Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, + AddressSpace); // Legalize the vector type, and get the legalized and unlegalized type // sizes. @@ -892,6 +897,31 @@ public: ->getVectorInstrCost(Instruction::InsertElement, VT, i); } + if (!IsMasked) + return Cost; + + Type *I8Type = Type::getInt8Ty(VT->getContext()); + VectorType *MaskVT = VectorType::get(I8Type, NumElts); + SubVT = VectorType::get(I8Type, NumSubElts); + + // The Mask shuffling cost is extract all the elements of the Mask + // and insert each of them Factor times into the wide vector: + // + // E.g. an interleaved group with factor 3: + // %mask = icmp ult <8 x i32> %vec1, %vec2 + // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, + // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> + // The cost is estimated as extract all mask elements from the <8xi1> mask + // vector and insert them factor times into the <24xi1> shuffled mask + // vector. + for (unsigned i = 0; i < NumSubElts; i++) + Cost += static_cast<T *>(this)->getVectorInstrCost( + Instruction::ExtractElement, SubVT, i); + + for (unsigned i = 0; i < NumElts; i++) + Cost += static_cast<T *>(this)->getVectorInstrCost( + Instruction::InsertElement, MaskVT, i); + return Cost; } |

