summaryrefslogtreecommitdiffstats
path: root/llvm/include
diff options
context:
space:
mode:
authorDorit Nuzman <dorit.nuzman@intel.com>2018-10-14 08:50:06 +0000
committerDorit Nuzman <dorit.nuzman@intel.com>2018-10-14 08:50:06 +0000
commit38bbf81ade2fa94018db2f0777fa19de71c78f41 (patch)
tree6e26fb98d4d69d913a700cd79ea468b9d0f59194 /llvm/include
parent74587a0e489dec97780404f064b7768c7ab6df22 (diff)
downloadbcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.tar.gz
bcm5719-llvm-38bbf81ade2fa94018db2f0777fa19de71c78f41.zip
recommit 344472 after fixing build failure on ARM and PPC.
llvm-svn: 344475
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h19
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h6
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h21
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h38
4 files changed, 72 insertions, 12 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 18b5a5cf0e5..c2a9d1ec195 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -587,6 +587,10 @@ public:
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
+ /// Enable matching of interleaved access groups that contain predicated
+ /// accesses and are vectorized using masked vector loads/stores.
+ bool enableMaskedInterleavedAccessVectorization() const;
+
/// Indicate that it is potentially unsafe to automatically vectorize
/// floating-point operations because the semantics of vector and scalar
/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
@@ -821,9 +825,11 @@ public:
/// load allows gaps)
/// \p Alignment is the alignment of the memory operation
/// \p AddressSpace is address space of the pointer.
+ /// \p IsMasked indicates if the memory access is predicated.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace) const;
+ unsigned AddressSpace,
+ bool IsMasked = false) const;
/// Calculate the cost of performing a vector reduction.
///
@@ -1072,6 +1078,7 @@ public:
virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
+ virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
@@ -1132,7 +1139,8 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) = 0;
+ unsigned AddressSpace,
+ bool IsMasked = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
@@ -1346,6 +1354,9 @@ public:
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
}
+ bool enableMaskedInterleavedAccessVectorization() override {
+ return Impl.enableMaskedInterleavedAccessVectorization();
+ }
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
@@ -1471,9 +1482,9 @@ public:
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace) override {
+ unsigned AddressSpace, bool IsMasked) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
}
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e39fe66c0a4..c64d4d36805 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -313,6 +313,8 @@ public:
bool enableInterleavedAccessVectorization() { return false; }
+ bool enableMaskedInterleavedAccessVectorization() { return false; }
+
bool isFPVectorizationPotentiallyUnsafe() { return false; }
bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
@@ -450,8 +452,8 @@ public:
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ bool IsMasked = false) {
return 1;
}
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 622d932f74f..2ac49f67662 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -125,6 +125,21 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
/// This function always sets a (possibly null) value for each K in Kinds.
Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
+/// Create a mask with replicated elements.
+///
+/// This function creates a shuffle mask for replicating each of the \p VF
+/// elements in a vector \p ReplicationFactor times. It can be used to
+/// transform a mask of \p VF elements into a mask of
+/// \p VF * \p ReplicationFactor elements used by a predicated
+/// interleaved-group of loads/stores whose Interleaved-factor ==
+/// \p ReplicationFactor.
+///
+/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+///
+/// <0,0,0,1,1,1,2,2,2,3,3,3>
+Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
+ unsigned VF);
+
/// Create an interleave shuffle mask.
///
/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
@@ -328,7 +343,7 @@ public:
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
DominatorTree *DT, LoopInfo *LI,
const LoopAccessInfo *LAI)
- : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
+ : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
~InterleavedAccessInfo() {
SmallPtrSet<InterleaveGroup *, 4> DelSet;
@@ -341,7 +356,9 @@ public:
/// Analyze the interleaved accesses and collect them in interleave
/// groups. Substitute symbolic strides using \p Strides.
- void analyzeInterleaving();
+ /// Consider also predicated loads/stores in the analysis if
+ /// \p EnableMaskedInterleavedGroup is true.
+ void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
/// Check if \p Instr belongs to any interleave group.
bool isInterleaved(Instruction *Instr) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index b460cdc0ba1..e740fe57172 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -783,8 +783,8 @@ public:
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ bool IsMasked = false) {
VectorType *VT = dyn_cast<VectorType>(VecTy);
assert(VT && "Expect a vector type for interleaved memory op");
@@ -795,8 +795,13 @@ public:
VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
// Firstly, the cost of load/store operation.
- unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
- Opcode, VecTy, Alignment, AddressSpace);
+ unsigned Cost;
+ if (IsMasked)
+ Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
+ Opcode, VecTy, Alignment, AddressSpace);
+ else
+ Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
+ AddressSpace);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.
@@ -892,6 +897,31 @@ public:
->getVectorInstrCost(Instruction::InsertElement, VT, i);
}
+ if (!IsMasked)
+ return Cost;
+
+ Type *I8Type = Type::getInt8Ty(VT->getContext());
+ VectorType *MaskVT = VectorType::get(I8Type, NumElts);
+ SubVT = VectorType::get(I8Type, NumSubElts);
+
+ // The Mask shuffling cost is extract all the elements of the Mask
+ // and insert each of them Factor times into the wide vector:
+ //
+ // E.g. an interleaved group with factor 3:
+ // %mask = icmp ult <8 x i32> %vec1, %vec2
+ // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+ // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+ // The cost is estimated as extract all mask elements from the <8xi1> mask
+ // vector and insert them factor times into the <24xi1> shuffled mask
+ // vector.
+ for (unsigned i = 0; i < NumSubElts; i++)
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, SubVT, i);
+
+ for (unsigned i = 0; i < NumElts; i++)
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, MaskVT, i);
+
return Cost;
}
OpenPOWER on IntegriCloud