diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 114 |
1 files changed, 82 insertions, 32 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ffa6b242e00..23d4a6b2166 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -172,6 +172,8 @@ static cl::opt<bool> EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")); +/// An interleave-group may need masking if it resides in a block that needs +/// predication, or in order to mask away gaps. static cl::opt<bool> EnableMaskedInterleavedMemAccesses( "enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")); @@ -1134,11 +1136,15 @@ public: } /// Returns true if an interleaved group requires a scalar iteration - /// to handle accesses with gaps. + /// to handle accesses with gaps, and there is nothing preventing us from + /// creating a scalar epilogue. bool requiresScalarEpilogue() const { - return InterleaveInfo.requiresScalarEpilogue(); + return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue(); } + /// Returns true if a scalar epilogue is not allowed due to optsize. + bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; } + /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } @@ -1229,6 +1235,15 @@ private: /// vectorization as a predicated block. SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization; + /// Records whether it is allowed to have the original scalar loop execute at + /// least once. This may be needed as a fallback loop in case runtime + /// aliasing/dependence checks fail, or to handle the tail/remainder + /// iterations when the trip count is unknown or doesn't divide by the VF, + /// or as a peel-loop to handle gaps in interleave-groups. + /// Under optsize and when the trip count is very small we don't allow any + /// iterations to execute in the scalar loop. + bool IsScalarEpilogueAllowed = true; + /// All blocks of loop are to be masked to fold tail of scalar iterations. bool FoldTailByMasking = false; @@ -1938,6 +1953,17 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { "reverse"); } +// Return whether we allow using masked interleave-groups (for dealing with +// strided loads/stores that reside in predicated blocks, or for dealing +// with gaps). +static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { + // If an override option has been passed in for interleaved accesses, use it. + if (EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0) + return EnableMaskedInterleavedMemAccesses; + + return TTI.enableMaskedInterleavedAccessVectorization(); +} + // Try to vectorize the interleave group that \p Instr belongs to. // // E.g. Translate following interleaved load group (factor = 3): @@ -1990,12 +2016,12 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, unsigned Index = Group->getIndex(Instr); VectorParts Mask; - bool IsMaskRequired = BlockInMask; - if (IsMaskRequired) { + bool IsMaskForCondRequired = BlockInMask; + if (IsMaskForCondRequired) { Mask = *BlockInMask; // TODO: extend the masked interleaved-group support to reversed access. assert(!Group->isReverse() && "Reversed masked interleave-group " - "not supported."); + "not supported."); } // If the group is reverse, adjust the index to refer to the last vector lane @@ -2036,20 +2062,35 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, setDebugLocFromInst(Builder, Instr); Value *UndefVec = UndefValue::get(VecTy); + Value *MaskForGaps = nullptr; + if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) { + MaskForGaps = createBitMaskForGaps(Builder, VF, *Group); + assert(MaskForGaps && "Mask for Gaps is required but it is null"); + } + // Vectorize the interleaved load group. if (isa<LoadInst>(Instr)) { // For each unroll part, create a wide load for the group. SmallVector<Value *, 2> NewLoads; for (unsigned Part = 0; Part < UF; Part++) { Instruction *NewLoad; - if (IsMaskRequired) { - auto *Undefs = UndefValue::get(Mask[Part]->getType()); - auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF); - Value *ShuffledMask = Builder.CreateShuffleVector( - Mask[Part], Undefs, RepMask, "interleaved.mask"); - NewLoad = Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(), - ShuffledMask, UndefVec, - "wide.masked.vec"); + if (IsMaskForCondRequired || MaskForGaps) { + assert(useMaskedInterleavedAccesses(*TTI) && + "masked interleaved groups are not allowed."); + Value *GroupMask = MaskForGaps; + if (IsMaskForCondRequired) { + auto *Undefs = UndefValue::get(Mask[Part]->getType()); + auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF); + Value *ShuffledMask = Builder.CreateShuffleVector( + Mask[Part], Undefs, RepMask, "interleaved.mask"); + GroupMask = MaskForGaps + ? Builder.CreateBinOp(Instruction::And, ShuffledMask, + MaskForGaps) + : ShuffledMask; + } + NewLoad = + Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(), + GroupMask, UndefVec, "wide.masked.vec"); } else NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part], @@ -2121,7 +2162,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, "interleaved.vec"); Instruction *NewStoreInstr; - if (IsMaskRequired) { + if (IsMaskForCondRequired) { auto *Undefs = UndefValue::get(Mask[Part]->getType()); auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF); Value *ShuffledMask = Builder.CreateShuffleVector( @@ -4333,29 +4374,32 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne return false; } -static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { - if (!(EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0)) - return TTI.enableMaskedInterleavedAccessVectorization(); - - // If an override option has been passed in for interleaved accesses, use it. - return EnableMaskedInterleavedMemAccesses; -} - bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, unsigned VF) { assert(isAccessInterleaved(I) && "Expecting interleaved access."); assert(getWideningDecision(I, VF) == CM_Unknown && "Decision should not be set yet."); - - if (!Legal->blockNeedsPredication(I->getParent()) || - !Legal->isMaskRequired(I)) + auto *Group = getInterleavedAccessGroup(I); + assert(Group && "Must have a group."); + + // Check if masking is required. + // A Group may need masking for one of two reasons: it resides in a block that + // needs predication, or it was decided to use masking to deal with gaps. + bool PredicatedAccessRequiresMasking = + Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I); + bool AccessWithGapsRequiresMasking = + Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; + if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking) return true; - if (!useMaskedInterleavedAccesses(TTI)) - return false; + // If masked interleaving is required, we expect that the user/target had + // enabled it, because otherwise it either wouldn't have been created or + // it should have been invalidated by the CostModel. + assert(useMaskedInterleavedAccesses(TTI) && + "Masked interleave-groups for predicated accesses are not enabled."); auto *Ty = getMemInstValueType(I); - return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty) + return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty) : TTI.isLegalMaskedStore(Ty); } @@ -4606,9 +4650,13 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { // Record that scalar epilogue is not allowed. LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n"); + IsScalarEpilogueAllowed = !OptForSize; + // We don't create an epilogue when optimizing for size. - // Invalidate interleave groups that require an epilogue. - InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); + // Invalidate interleave groups that require an epilogue if we can't mask + // the interleave-group. + if (!useMaskedInterleavedAccesses(TTI)) + InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); @@ -5495,13 +5543,15 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, } // Calculate the cost of the whole interleaved group. + bool UseMaskForGaps = + Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; unsigned Cost = TTI.getInterleavedMemoryOpCost( I->getOpcode(), WideVecTy, Group->getFactor(), Indices, - Group->getAlignment(), AS, Legal->isMaskRequired(I)); + Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps); if (Group->isReverse()) { // TODO: Add support for reversed masked interleaved access. - assert(!Legal->isMaskRequired(I) && + assert(!Legal->isMaskRequired(I) && "Reverse masked interleaved access not supported."); Cost += Group->getNumMembers() * TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); |