diff options
Diffstat (limited to 'llvm/lib')
17 files changed, 58 insertions, 195 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 867403d0ef1..4ad48e351a4 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -268,10 +268,6 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } -bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const { - return TTIImpl->enableMaskedInterleavedAccessVectorization(); -} - bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } @@ -519,9 +515,9 @@ int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, bool IsMasked) const { - int Cost = TTIImpl->getInterleavedMemoryOpCost( - Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, IsMasked); + unsigned Alignment, unsigned AddressSpace) const { + int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index e14449b8838..272c665ace1 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -502,16 +502,6 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { return Inst; } -Constant *llvm::createReplicatedMask(IRBuilder<> &Builder, - unsigned ReplicationFactor, unsigned VF) { - SmallVector<Constant *, 16> MaskVec; - for (unsigned i = 0; i < VF; i++) - for (unsigned j = 0; j < ReplicationFactor; j++) - MaskVec.push_back(Builder.getInt32(i)); - - return ConstantVector::get(MaskVec); -} - Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs) { SmallVector<Constant *, 16> Mask; @@ -682,8 +672,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses( // this group because it and (2) are dependent. However, (1) can be grouped // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. -void InterleavedAccessInfo::analyzeInterleaving( - bool EnablePredicatedInterleavedMemAccesses) { +void InterleavedAccessInfo::analyzeInterleaving() { LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); const ValueToValueMap &Strides = LAI->getSymbolicStrides(); @@ -723,8 +712,9 @@ void InterleavedAccessInfo::analyzeInterleaving( // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. InterleaveGroup *Group = nullptr; - if (isStrided(DesB.Stride) && - (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { + // TODO: Ignore B if it is in a predicated block. This restriction can be + // relaxed in the future once we handle masked interleaved groups. + if (isStrided(DesB.Stride) && !isPredicated(B->getParent())) { Group = getInterleaveGroup(B); if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B @@ -818,12 +808,11 @@ void InterleavedAccessInfo::analyzeInterleaving( if (DistanceToB % static_cast<int64_t>(DesB.Size)) continue; - // All members of a predicated interleave-group must have the same predicate, - // and currently must reside in the same BB. - BasicBlock *BlockA = A->getParent(); - BasicBlock *BlockB = B->getParent(); - if ((isPredicated(BlockA) || isPredicated(BlockB)) && - (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) + // Ignore A if either A or B is in a predicated block. Although we + // currently prevent group formation for predicated accesses, we may be + // able to relax this limitation in the future once we handle more + // complicated blocks. + if (isPredicated(A->getParent()) || isPredicated(B->getParent())) continue; // The index of A is the index of B plus A's distance to B in multiples diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a16de89cf10..96e751e8697 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -659,12 +659,11 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked) { + unsigned AddressSpace) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa<VectorType>(VecTy) && "Expect a vector type"); - if (!IsMasked && Factor <= TLI->getMaxSupportedInterleaveFactor()) { + if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); @@ -677,7 +676,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); } int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index b3893d32850..c056a7d2428 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -146,7 +146,7 @@ public: int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, bool IsMasked = false); + unsigned AddressSpace); bool shouldConsiderAddressTypePromotion(const Instruction &I, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index bac3e6c2387..1b0d162f726 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -542,16 +542,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace - bool IsMasked) { + unsigned AddressSpace) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa<VectorType>(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; - if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits && - !IsMasked) { + if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); @@ -564,7 +562,7 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); } void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 84e3055c6bc..7d14bd7c256 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -169,7 +169,7 @@ public: int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, bool IsMasked); + unsigned AddressSpace); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 79b269bccfe..4d0e7dc52e8 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -206,10 +206,10 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, bool IsMasked) { - if (Indices.size() != Factor || IsMasked) + unsigned Alignment, unsigned AddressSpace) { + if (Indices.size() != Factor) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr); } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 901a91692e8..2c03cd268ff 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -123,7 +123,7 @@ public: bool VariableMask, unsigned Alignment); unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, bool IsMasked); + unsigned AddressSpace); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I); unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2c81661cb17..b0da9b5a6d7 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -473,12 +473,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - Bool IsMasked) { - if (IsMasked) - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); - + unsigned AddressSpace) { assert(isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op"); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 252d46e7a2a..2ee2b3eb808 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -90,8 +90,7 @@ public: unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked = false); + unsigned AddressSpace); /// @} }; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 1eaeb9699bf..6f553d5bed3 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -909,11 +909,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked) { - if (IsMasked) - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + unsigned AddressSpace) { assert(isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op"); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 92b2b9bdcb8..bfa942357c5 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -92,7 +92,7 @@ public: unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, bool IsMasked = false); + unsigned AddressSpace); /// @} }; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 82e4dfe25b7..d3a75123935 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2723,12 +2723,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked) { - - if (IsMasked) - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + unsigned AddressSpace) { // We currently Support only fully-interleaved groups, with no gaps. // TODO: Support also strided loads (interleaved-groups with gaps). @@ -2837,12 +2832,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked) { - - if (IsMasked) - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + unsigned AddressSpace) { // VecTy for interleave memop is <VF*Factor x Elt>. // So, for VF=4, Interleave Factor = 3, Element type = i32 we have @@ -2960,8 +2950,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool IsMasked) { + unsigned AddressSpace) { auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { Type *EltTy = VecTy->getVectorElementType(); if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || @@ -2973,11 +2962,11 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, }; if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); if (ST->hasAVX2()) return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 2bd778a4211..3df89903882 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -101,16 +101,13 @@ public: int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, - bool IsMasked = false); + unsigned Alignment, unsigned AddressSpace); int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, - bool IsMasked = false); + unsigned Alignment, unsigned AddressSpace); int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, - unsigned Alignment, unsigned AddressSpace, - bool IsMasked = false); + unsigned Alignment, unsigned AddressSpace); int getIntImmCost(int64_t); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e93cfb34156..7ebe8d102b7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -172,10 +172,6 @@ static cl::opt<bool> EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")); -static cl::opt<bool> EnableMaskedInterleavedMemAccesses( - "enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, - cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")); - /// We don't interleave loops with a known constant trip count below this /// number. static const unsigned TinyTripCountInterleaveThreshold = 128; @@ -412,10 +408,8 @@ public: /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(Value *V, const VPIteration &Instance); - /// Try to vectorize the interleaved access group that \p Instr belongs to, - /// optionally masking the vector operations if \p BlockInMask is non-null. - void vectorizeInterleaveGroup(Instruction *Instr, - VectorParts *BlockInMask = nullptr); + /// Try to vectorize the interleaved access group that \p Instr belongs to. + void vectorizeInterleaveGroup(Instruction *Instr); /// Vectorize Load and Store instructions, optionally masking the vector /// operations if \p BlockInMask is non-null. @@ -1118,11 +1112,6 @@ public: /// access that can be widened. bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1); - /// Returns true if \p I is a memory instruction in an interleaved-group - /// of memory accesses that can be vectorized with wide vector loads/stores - /// and shuffles. - bool interleavedAccessCanBeWidened(Instruction *I, unsigned VF = 1); - /// Check if \p Instr belongs to any interleaved access group. bool isAccessInterleaved(Instruction *Instr) { return InterleaveInfo.isInterleaved(Instr); @@ -1957,8 +1946,7 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { // %interleaved.vec = shuffle %R_G.vec, %B_U.vec, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B -void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, - VectorParts *BlockInMask) { +void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); @@ -1980,15 +1968,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, SmallVector<Value *, 2> NewPtrs; unsigned Index = Group->getIndex(Instr); - VectorParts Mask; - bool IsMaskRequired = BlockInMask; - if (IsMaskRequired) { - Mask = *BlockInMask; - // TODO: extend the masked interleaved-group support to reversed access. - assert(!Group->isReverse() && "Reversed masked interleave-group " - "not supported."); - } - // If the group is reverse, adjust the index to refer to the last vector lane // instead of the first. We adjust the index from the first vector lane, // rather than directly getting the pointer for lane VF - 1, because the @@ -2032,19 +2011,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, // For each unroll part, create a wide load for the group. SmallVector<Value *, 2> NewLoads; for (unsigned Part = 0; Part < UF; Part++) { - Instruction *NewLoad; - if (IsMaskRequired) { - auto *Undefs = UndefValue::get(Mask[Part]->getType()); - auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF); - Value *ShuffledMask = Builder.CreateShuffleVector( - Mask[Part], Undefs, RepMask, "interleaved.mask"); - NewLoad = Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(), - ShuffledMask, UndefVec, - "wide.masked.vec"); - } - else - NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part], - Group->getAlignment(), "wide.vec"); + auto *NewLoad = Builder.CreateAlignedLoad( + NewPtrs[Part], Group->getAlignment(), "wide.vec"); Group->addMetadata(NewLoad); NewLoads.push_back(NewLoad); } @@ -2111,18 +2079,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask, "interleaved.vec"); - Instruction *NewStoreInstr; - if (IsMaskRequired) { - auto *Undefs = UndefValue::get(Mask[Part]->getType()); - auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF); - Value *ShuffledMask = Builder.CreateShuffleVector( - Mask[Part], Undefs, RepMask, "interleaved.mask"); - NewStoreInstr = Builder.CreateMaskedStore( - IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask); - } - else - NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part], - Group->getAlignment()); + Instruction *NewStoreInstr = + Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment()); Group->addMetadata(NewStoreInstr); } @@ -4295,32 +4253,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne return false; } -static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { - if (!(EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0)) - return TTI.enableMaskedInterleavedAccessVectorization(); - - // If an override option has been passed in for interleaved accesses, use it. - return EnableMaskedInterleavedMemAccesses; -} - -bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, - unsigned VF) { - assert(isAccessInterleaved(I) && "Expecting interleaved access."); - assert(getWideningDecision(I, VF) == CM_Unknown && - "Decision should not be set yet."); - - if (!Legal->blockNeedsPredication(I->getParent()) || - !Legal->isMaskRequired(I)) - return true; - - if (!useMaskedInterleavedAccesses(TTI)) - return false; - - auto *Ty = getMemInstValueType(I); - return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty) - : TTI.isLegalMaskedStore(Ty); -} - bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I, unsigned VF) { // Get and ensure we have a valid memory instruction. @@ -5439,17 +5371,13 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, } // Calculate the cost of the whole interleaved group. - unsigned Cost = TTI.getInterleavedMemoryOpCost( - I->getOpcode(), WideVecTy, Group->getFactor(), Indices, - Group->getAlignment(), AS, Legal->isMaskRequired(I)); - - if (Group->isReverse()) { - // TODO: Add support for reversed masked interleaved access. - assert(!Legal->isMaskRequired(I) && - "Reverse masked interleaved access not supported."); + unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy, + Group->getFactor(), Indices, + Group->getAlignment(), AS); + + if (Group->isReverse()) Cost += Group->getNumMembers() * TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); - } return Cost; } @@ -5551,8 +5479,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { continue; NumAccesses = Group->getNumMembers(); - if (interleavedAccessCanBeWidened(&I, VF)) - InterleaveCost = getInterleaveGroupCost(&I, VF); + InterleaveCost = getInterleaveGroupCost(&I, VF); } unsigned GatherScatterCost = @@ -6225,8 +6152,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { } VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, - VFRange &Range, - VPlanPtr &Plan) { + VFRange &Range) { const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; @@ -6248,11 +6174,7 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, assert(I == IG->getInsertPos() && "Generating a recipe for an adjunct member of an interleave group"); - VPValue *Mask = nullptr; - if (Legal->isMaskRequired(I)) - Mask = createBlockInMask(I->getParent(), Plan); - - return new VPInterleaveRecipe(IG, Mask); + return new VPInterleaveRecipe(IG); } VPWidenMemoryInstructionRecipe * @@ -6520,7 +6442,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPRecipeBase *Recipe = nullptr; // Check if Instr should belong to an interleave memory recipe, or already // does. In the latter case Instr is irrelevant. - if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) { + if ((Recipe = tryToInterleaveMemory(Instr, Range))) { VPBB->appendRecipe(Recipe); return true; } @@ -6747,10 +6669,6 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; IG->getInsertPos()->printAsOperand(O, false); - if (User) { - O << ", "; - User->getOperand(0)->printAsOperand(O); - } O << "\\l\""; for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *I = IG->getMember(i)) @@ -6813,15 +6731,7 @@ void VPBlendRecipe::execute(VPTransformState &State) { void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - if (!User) - return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); - - // Last (and currently only) operand is a mask. - InnerLoopVectorizer::VectorParts MaskValues(State.UF); - VPValue *Mask = User->getOperand(User->getNumOperands() - 1); - for (unsigned Part = 0; Part < State.UF; ++Part) - MaskValues[Part] = State.get(Mask, Part); - State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues); + State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); } void VPReplicateRecipe::execute(VPTransformState &State) { @@ -7120,7 +7030,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Analyze interleaved memory accesses. if (UseInterleaved) { - IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI)); + IAI.analyzeInterleaving(); } // Use the cost model. diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 15d38ac9c84..f43a8bb123b 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -69,8 +69,7 @@ public: /// \return value is <true, nullptr>, as it is handled by another recipe. /// \p Range.End may be decreased to ensure same decision from \p Range.Start /// to \p Range.End. - VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan); + VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); /// Check if \I is a memory instruction to be widened for \p Range.Start and /// potentially masked. Such instructions are handled by a recipe that takes diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 81b1986c97d..c3123b41600 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -769,14 +769,10 @@ public: class VPInterleaveRecipe : public VPRecipeBase { private: const InterleaveGroup *IG; - std::unique_ptr<VPUser> User; public: - VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), IG(IG) { - if (Mask) // Create a VPInstruction to register as a user of the mask. - User.reset(new VPUser({Mask})); - } + VPInterleaveRecipe(const InterleaveGroup *IG) + : VPRecipeBase(VPInterleaveSC), IG(IG) {} ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. |