summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp114
1 files changed, 82 insertions, 32 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ffa6b242e00..23d4a6b2166 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -172,6 +172,8 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
+/// An interleave-group may need masking if it resides in a block that needs
+/// predication, or in order to mask away gaps.
static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
@@ -1134,11 +1136,15 @@ public:
}
/// Returns true if an interleaved group requires a scalar iteration
- /// to handle accesses with gaps.
+ /// to handle accesses with gaps, and there is nothing preventing us from
+ /// creating a scalar epilogue.
bool requiresScalarEpilogue() const {
- return InterleaveInfo.requiresScalarEpilogue();
+ return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue();
}
+ /// Returns true if a scalar epilogue is not allowed due to optsize.
+ bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; }
+
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
@@ -1229,6 +1235,15 @@ private:
/// vectorization as a predicated block.
SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
+ /// Records whether it is allowed to have the original scalar loop execute at
+ /// least once. This may be needed as a fallback loop in case runtime
+ /// aliasing/dependence checks fail, or to handle the tail/remainder
+ /// iterations when the trip count is unknown or doesn't divide by the VF,
+ /// or as a peel-loop to handle gaps in interleave-groups.
+ /// Under optsize and when the trip count is very small we don't allow any
+ /// iterations to execute in the scalar loop.
+ bool IsScalarEpilogueAllowed = true;
+
/// All blocks of loop are to be masked to fold tail of scalar iterations.
bool FoldTailByMasking = false;
@@ -1938,6 +1953,17 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}
+// Return whether we allow using masked interleave-groups (for dealing with
+// strided loads/stores that reside in predicated blocks, or for dealing
+// with gaps).
+static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
+ // If an override option has been passed in for interleaved accesses, use it.
+ if (EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0)
+ return EnableMaskedInterleavedMemAccesses;
+
+ return TTI.enableMaskedInterleavedAccessVectorization();
+}
+
// Try to vectorize the interleave group that \p Instr belongs to.
//
// E.g. Translate following interleaved load group (factor = 3):
@@ -1990,12 +2016,12 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
unsigned Index = Group->getIndex(Instr);
VectorParts Mask;
- bool IsMaskRequired = BlockInMask;
- if (IsMaskRequired) {
+ bool IsMaskForCondRequired = BlockInMask;
+ if (IsMaskForCondRequired) {
Mask = *BlockInMask;
// TODO: extend the masked interleaved-group support to reversed access.
assert(!Group->isReverse() && "Reversed masked interleave-group "
- "not supported.");
+ "not supported.");
}
// If the group is reverse, adjust the index to refer to the last vector lane
@@ -2036,20 +2062,35 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
setDebugLocFromInst(Builder, Instr);
Value *UndefVec = UndefValue::get(VecTy);
+ Value *MaskForGaps = nullptr;
+ if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
+ MaskForGaps = createBitMaskForGaps(Builder, VF, *Group);
+ assert(MaskForGaps && "Mask for Gaps is required but it is null");
+ }
+
// Vectorize the interleaved load group.
if (isa<LoadInst>(Instr)) {
// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {
Instruction *NewLoad;
- if (IsMaskRequired) {
- auto *Undefs = UndefValue::get(Mask[Part]->getType());
- auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
- Value *ShuffledMask = Builder.CreateShuffleVector(
- Mask[Part], Undefs, RepMask, "interleaved.mask");
- NewLoad = Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
- ShuffledMask, UndefVec,
- "wide.masked.vec");
+ if (IsMaskForCondRequired || MaskForGaps) {
+ assert(useMaskedInterleavedAccesses(*TTI) &&
+ "masked interleaved groups are not allowed.");
+ Value *GroupMask = MaskForGaps;
+ if (IsMaskForCondRequired) {
+ auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ Mask[Part], Undefs, RepMask, "interleaved.mask");
+ GroupMask = MaskForGaps
+ ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
+ MaskForGaps)
+ : ShuffledMask;
+ }
+ NewLoad =
+ Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+ GroupMask, UndefVec, "wide.masked.vec");
}
else
NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part],
@@ -2121,7 +2162,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
"interleaved.vec");
Instruction *NewStoreInstr;
- if (IsMaskRequired) {
+ if (IsMaskForCondRequired) {
auto *Undefs = UndefValue::get(Mask[Part]->getType());
auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
Value *ShuffledMask = Builder.CreateShuffleVector(
@@ -4333,29 +4374,32 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
return false;
}
-static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
- if (!(EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0))
- return TTI.enableMaskedInterleavedAccessVectorization();
-
- // If an override option has been passed in for interleaved accesses, use it.
- return EnableMaskedInterleavedMemAccesses;
-}
-
bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
unsigned VF) {
assert(isAccessInterleaved(I) && "Expecting interleaved access.");
assert(getWideningDecision(I, VF) == CM_Unknown &&
"Decision should not be set yet.");
-
- if (!Legal->blockNeedsPredication(I->getParent()) ||
- !Legal->isMaskRequired(I))
+ auto *Group = getInterleavedAccessGroup(I);
+ assert(Group && "Must have a group.");
+
+ // Check if masking is required.
+ // A Group may need masking for one of two reasons: it resides in a block that
+ // needs predication, or it was decided to use masking to deal with gaps.
+ bool PredicatedAccessRequiresMasking =
+ Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
+ bool AccessWithGapsRequiresMasking =
+ Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+ if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
return true;
- if (!useMaskedInterleavedAccesses(TTI))
- return false;
+ // If masked interleaving is required, we expect that the user/target had
+ // enabled it, because otherwise it either wouldn't have been created or
+ // it should have been invalidated by the CostModel.
+ assert(useMaskedInterleavedAccesses(TTI) &&
+ "Masked interleave-groups for predicated accesses are not enabled.");
auto *Ty = getMemInstValueType(I);
- return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
+ return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
: TTI.isLegalMaskedStore(Ty);
}
@@ -4606,9 +4650,13 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
// Record that scalar epilogue is not allowed.
LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n");
+ IsScalarEpilogueAllowed = !OptForSize;
+
// We don't create an epilogue when optimizing for size.
- // Invalidate interleave groups that require an epilogue.
- InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+ // Invalidate interleave groups that require an epilogue if we can't mask
+ // the interleave-group.
+ if (!useMaskedInterleavedAccesses(TTI))
+ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
@@ -5495,13 +5543,15 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
}
// Calculate the cost of the whole interleaved group.
+ bool UseMaskForGaps =
+ Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
unsigned Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlignment(), AS, Legal->isMaskRequired(I));
+ Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.
- assert(!Legal->isMaskRequired(I) &&
+ assert(!Legal->isMaskRequired(I) &&
"Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
OpenPOWER on IntegriCloud