summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp29
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp23
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h9
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp126
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h8
17 files changed, 195 insertions, 58 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 4ad48e351a4..867403d0ef1 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -268,6 +268,10 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
return TTIImpl->enableInterleavedAccessVectorization();
}
+bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
+ return TTIImpl->enableMaskedInterleavedAccessVectorization();
+}
+
bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
@@ -515,9 +519,9 @@ int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) const {
- int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ unsigned Alignment, unsigned AddressSpace, bool IsMasked) const {
+ int Cost = TTIImpl->getInterleavedMemoryOpCost(
+ Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, IsMasked);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 272c665ace1..e14449b8838 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -502,6 +502,16 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
return Inst;
}
+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,
+ unsigned ReplicationFactor, unsigned VF) {
+ SmallVector<Constant *, 16> MaskVec;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < ReplicationFactor; j++)
+ MaskVec.push_back(Builder.getInt32(i));
+
+ return ConstantVector::get(MaskVec);
+}
+
Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs) {
SmallVector<Constant *, 16> Mask;
@@ -672,7 +682,8 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// this group because it and (2) are dependent. However, (1) can be grouped
// with other accesses that may precede it in program order. Note that a
// bottom-up order does not imply that WAW dependences should not be checked.
-void InterleavedAccessInfo::analyzeInterleaving() {
+void InterleavedAccessInfo::analyzeInterleaving(
+ bool EnablePredicatedInterleavedMemAccesses) {
LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
const ValueToValueMap &Strides = LAI->getSymbolicStrides();
@@ -712,9 +723,8 @@ void InterleavedAccessInfo::analyzeInterleaving() {
// create a group for B, we continue with the bottom-up algorithm to ensure
// we don't break any of B's dependences.
InterleaveGroup *Group = nullptr;
- // TODO: Ignore B if it is in a predicated block. This restriction can be
- // relaxed in the future once we handle masked interleaved groups.
- if (isStrided(DesB.Stride) && !isPredicated(B->getParent())) {
+ if (isStrided(DesB.Stride) &&
+ (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
Group = getInterleaveGroup(B);
if (!Group) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
@@ -808,11 +818,12 @@ void InterleavedAccessInfo::analyzeInterleaving() {
if (DistanceToB % static_cast<int64_t>(DesB.Size))
continue;
- // Ignore A if either A or B is in a predicated block. Although we
- // currently prevent group formation for predicated accesses, we may be
- // able to relax this limitation in the future once we handle more
- // complicated blocks.
- if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
+ // All members of a predicated interleave-group must have the same predicate,
+ // and currently must reside in the same BB.
+ BasicBlock *BlockA = A->getParent();
+ BasicBlock *BlockB = B->getParent();
+ if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
+ (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
continue;
// The index of A is the index of B plus A's distance to B in multiples
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 96e751e8697..a16de89cf10 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -659,11 +659,12 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
- if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ if (!IsMasked && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -676,7 +677,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
}
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c056a7d2428..b3893d32850 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -146,7 +146,7 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool IsMasked = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1b0d162f726..90e0cd96682 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -542,14 +542,16 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
- if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
+ !IsMasked) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -562,7 +564,7 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 7d14bd7c256..84e3055c6bc 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -169,7 +169,7 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool IsMasked);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 4d0e7dc52e8..79b269bccfe 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -206,10 +206,10 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) {
- if (Indices.size() != Factor)
+ unsigned Alignment, unsigned AddressSpace, bool IsMasked) {
+ if (Indices.size() != Factor || IsMasked)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 2c03cd268ff..901a91692e8 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -123,7 +123,7 @@ public:
bool VariableMask, unsigned Alignment);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool IsMasked);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I);
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b0da9b5a6d7..f67bacc87ec 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -473,7 +473,12 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
+ if (IsMasked)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, IsMasked);
+
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2ee2b3eb808..252d46e7a2a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -90,7 +90,8 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool IsMasked = false);
/// @}
};
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 6f553d5bed3..1eaeb9699bf 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -909,7 +909,11 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
+ if (IsMasked)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, IsMasked);
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index bfa942357c5..92b2b9bdcb8 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -92,7 +92,7 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool IsMasked = false);
/// @}
};
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d3a75123935..82e4dfe25b7 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2723,7 +2723,12 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
+
+ if (IsMasked)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, IsMasked);
// We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps).
@@ -2832,7 +2837,12 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
+
+ if (IsMasked)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, IsMasked);
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -2950,7 +2960,8 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool IsMasked) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
@@ -2962,11 +2973,11 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
if (ST->hasAVX2())
return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, IsMasked);
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 3df89903882..2bd778a4211 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -101,13 +101,16 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool IsMasked = false);
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool IsMasked = false);
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool IsMasked = false);
int getIntImmCost(int64_t);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7ebe8d102b7..e93cfb34156 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -172,6 +172,10 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
+static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
+ "enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
+ cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
+
/// We don't interleave loops with a known constant trip count below this
/// number.
static const unsigned TinyTripCountInterleaveThreshold = 128;
@@ -408,8 +412,10 @@ public:
/// Construct the vector value of a scalarized value \p V one lane at a time.
void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
- /// Try to vectorize the interleaved access group that \p Instr belongs to.
- void vectorizeInterleaveGroup(Instruction *Instr);
+ /// Try to vectorize the interleaved access group that \p Instr belongs to,
+ /// optionally masking the vector operations if \p BlockInMask is non-null.
+ void vectorizeInterleaveGroup(Instruction *Instr,
+ VectorParts *BlockInMask = nullptr);
/// Vectorize Load and Store instructions, optionally masking the vector
/// operations if \p BlockInMask is non-null.
@@ -1112,6 +1118,11 @@ public:
/// access that can be widened.
bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
+ /// Returns true if \p I is a memory instruction in an interleaved-group
+ /// of memory accesses that can be vectorized with wide vector loads/stores
+ /// and shuffles.
+ bool interleavedAccessCanBeWidened(Instruction *I, unsigned VF = 1);
+
/// Check if \p Instr belongs to any interleaved access group.
bool isAccessInterleaved(Instruction *Instr) {
return InterleaveInfo.isInterleaved(Instr);
@@ -1946,7 +1957,8 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
-void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
+void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
+ VectorParts *BlockInMask) {
const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr);
assert(Group && "Fail to get an interleaved access group.");
@@ -1968,6 +1980,15 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
SmallVector<Value *, 2> NewPtrs;
unsigned Index = Group->getIndex(Instr);
+ VectorParts Mask;
+ bool IsMaskRequired = BlockInMask;
+ if (IsMaskRequired) {
+ Mask = *BlockInMask;
+ // TODO: extend the masked interleaved-group support to reversed access.
+ assert(!Group->isReverse() && "Reversed masked interleave-group "
+ "not supported.");
+ }
+
// If the group is reverse, adjust the index to refer to the last vector lane
// instead of the first. We adjust the index from the first vector lane,
// rather than directly getting the pointer for lane VF - 1, because the
@@ -2011,8 +2032,19 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {
- auto *NewLoad = Builder.CreateAlignedLoad(
- NewPtrs[Part], Group->getAlignment(), "wide.vec");
+ Instruction *NewLoad;
+ if (IsMaskRequired) {
+ auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ Mask[Part], Undefs, RepMask, "interleaved.mask");
+ NewLoad = Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+ ShuffledMask, UndefVec,
+ "wide.masked.vec");
+ }
+ else
+ NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part],
+ Group->getAlignment(), "wide.vec");
Group->addMetadata(NewLoad);
NewLoads.push_back(NewLoad);
}
@@ -2079,8 +2111,18 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
"interleaved.vec");
- Instruction *NewStoreInstr =
- Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
+ Instruction *NewStoreInstr;
+ if (IsMaskRequired) {
+ auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ Mask[Part], Undefs, RepMask, "interleaved.mask");
+ NewStoreInstr = Builder.CreateMaskedStore(
+ IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask);
+ }
+ else
+ NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part],
+ Group->getAlignment());
Group->addMetadata(NewStoreInstr);
}
@@ -4253,6 +4295,32 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
return false;
}
+static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
+ if (!(EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0))
+ return TTI.enableMaskedInterleavedAccessVectorization();
+
+ // If an override option has been passed in for interleaved accesses, use it.
+ return EnableMaskedInterleavedMemAccesses;
+}
+
+bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
+ unsigned VF) {
+ assert(isAccessInterleaved(I) && "Expecting interleaved access.");
+ assert(getWideningDecision(I, VF) == CM_Unknown &&
+ "Decision should not be set yet.");
+
+ if (!Legal->blockNeedsPredication(I->getParent()) ||
+ !Legal->isMaskRequired(I))
+ return true;
+
+ if (!useMaskedInterleavedAccesses(TTI))
+ return false;
+
+ auto *Ty = getMemInstValueType(I);
+ return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
+ : TTI.isLegalMaskedStore(Ty);
+}
+
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
unsigned VF) {
// Get and ensure we have a valid memory instruction.
@@ -5371,13 +5439,17 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
}
// Calculate the cost of the whole interleaved group.
- unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy,
- Group->getFactor(), Indices,
- Group->getAlignment(), AS);
-
- if (Group->isReverse())
+ unsigned Cost = TTI.getInterleavedMemoryOpCost(
+ I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
+ Group->getAlignment(), AS, Legal->isMaskRequired(I));
+
+ if (Group->isReverse()) {
+ // TODO: Add support for reversed masked interleaved access.
+ assert(!Legal->isMaskRequired(I) &&
+ "Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+ }
return Cost;
}
@@ -5479,7 +5551,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
continue;
NumAccesses = Group->getNumMembers();
- InterleaveCost = getInterleaveGroupCost(&I, VF);
+ if (interleavedAccessCanBeWidened(&I, VF))
+ InterleaveCost = getInterleaveGroupCost(&I, VF);
}
unsigned GatherScatterCost =
@@ -6152,7 +6225,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
}
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
- VFRange &Range) {
+ VFRange &Range,
+ VPlanPtr &Plan) {
const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I);
if (!IG)
return nullptr;
@@ -6174,7 +6248,11 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
assert(I == IG->getInsertPos() &&
"Generating a recipe for an adjunct member of an interleave group");
- return new VPInterleaveRecipe(IG);
+ VPValue *Mask = nullptr;
+ if (Legal->isMaskRequired(I))
+ Mask = createBlockInMask(I->getParent(), Plan);
+
+ return new VPInterleaveRecipe(IG, Mask);
}
VPWidenMemoryInstructionRecipe *
@@ -6442,7 +6520,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
VPRecipeBase *Recipe = nullptr;
// Check if Instr should belong to an interleave memory recipe, or already
// does. In the latter case Instr is irrelevant.
- if ((Recipe = tryToInterleaveMemory(Instr, Range))) {
+ if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
VPBB->appendRecipe(Recipe);
return true;
}
@@ -6669,6 +6747,10 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
O << " +\n"
<< Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
IG->getInsertPos()->printAsOperand(O, false);
+ if (User) {
+ O << ", ";
+ User->getOperand(0)->printAsOperand(O);
+ }
O << "\\l\"";
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *I = IG->getMember(i))
@@ -6731,7 +6813,15 @@ void VPBlendRecipe::execute(VPTransformState &State) {
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
- State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+ if (!User)
+ return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+
+ // Last (and currently only) operand is a mask.
+ InnerLoopVectorizer::VectorParts MaskValues(State.UF);
+ VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ MaskValues[Part] = State.get(Mask, Part);
+ State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
}
void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7030,7 +7120,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Analyze interleaved memory accesses.
if (UseInterleaved) {
- IAI.analyzeInterleaving();
+ IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI));
}
// Use the cost model.
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index f43a8bb123b..15d38ac9c84 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,7 +69,8 @@ public:
/// \return value is <true, nullptr>, as it is handled by another recipe.
/// \p Range.End may be decreased to ensure same decision from \p Range.Start
/// to \p Range.End.
- VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
+ VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
+ VPlanPtr &Plan);
/// Check if \I is a memory instruction to be widened for \p Range.Start and
/// potentially masked. Such instructions are handled by a recipe that takes
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c3123b41600..81b1986c97d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -769,10 +769,14 @@ public:
class VPInterleaveRecipe : public VPRecipeBase {
private:
const InterleaveGroup *IG;
+ std::unique_ptr<VPUser> User;
public:
- VPInterleaveRecipe(const InterleaveGroup *IG)
- : VPRecipeBase(VPInterleaveSC), IG(IG) {}
+ VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Mask)
+ : VPRecipeBase(VPInterleaveSC), IG(IG) {
+ if (Mask) // Create a VPInstruction to register as a user of the mask.
+ User.reset(new VPUser({Mask}));
+ }
~VPInterleaveRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
OpenPOWER on IntegriCloud