summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp133
1 files changed, 133 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 93a6d1c00a8..11bc030e312 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -158,6 +158,119 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
+/// Checks if the vector of instructions can be represented as a shuffle, like:
+/// %x0 = extractelement <4 x i8> %x, i32 0
+/// %x3 = extractelement <4 x i8> %x, i32 3
+/// %y1 = extractelement <4 x i8> %y, i32 1
+/// %y2 = extractelement <4 x i8> %y, i32 2
+/// %x0x0 = mul i8 %x0, %x0
+/// %x3x3 = mul i8 %x3, %x3
+/// %y1y1 = mul i8 %y1, %y1
+/// %y2y2 = mul i8 %y2, %y2
+/// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
+/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
+/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
+/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
+/// ret <4 x i8> %ins4
+/// can be transformed into:
+/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
+/// i32 6>
+/// %2 = mul <4 x i8> %1, %1
+/// ret <4 x i8> %2
+/// We convert this initially to something like:
+/// %x0 = extractelement <4 x i8> %x, i32 0
+/// %x3 = extractelement <4 x i8> %x, i32 3
+/// %y1 = extractelement <4 x i8> %y, i32 1
+/// %y2 = extractelement <4 x i8> %y, i32 2
+/// %1 = insertelement <4 x i8> undef, i8 %x0, i32 0
+/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
+/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
+/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
+/// %5 = mul <4 x i8> %4, %4
+/// %6 = extractelement <4 x i8> %5, i32 0
+/// %ins1 = insertelement <4 x i8> undef, i8 %6, i32 0
+/// %7 = extractelement <4 x i8> %5, i32 1
+/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
+/// %8 = extractelement <4 x i8> %5, i32 2
+/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
+/// %9 = extractelement <4 x i8> %5, i32 3
+/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
+/// ret <4 x i8> %ins4
+/// InstCombiner transforms this into a shuffle and vector mul
+static Optional<TargetTransformInfo::ShuffleKind>
+isShuffle(ArrayRef<Value *> VL) {
+ auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ unsigned Size = EI0->getVectorOperandType()->getVectorNumElements();
+ Value *Vec1 = nullptr;
+ Value *Vec2 = nullptr;
+ enum ShuffleMode {Unknown, FirstAlternate, SecondAlternate, Permute};
+ ShuffleMode CommonShuffleMode = Unknown;
+ for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ auto *EI = cast<ExtractElementInst>(VL[I]);
+ auto *Vec = EI->getVectorOperand();
+ // All vector operands must have the same number of vector elements.
+ if (Vec->getType()->getVectorNumElements() != Size)
+ return None;
+ auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
+ if (!Idx)
+ return None;
+ // Undefined behavior if Idx is negative or >= Size.
+ if (Idx->getValue().uge(Size))
+ continue;
+ unsigned IntIdx = Idx->getValue().getZExtValue();
+ // We can extractelement from undef vector.
+ if (isa<UndefValue>(Vec))
+ continue;
+ // For correct shuffling we have to have at most 2 different vector operands
+ // in all extractelement instructions.
+ if (Vec1 && Vec2 && Vec != Vec1 && Vec != Vec2)
+ return None;
+ if (CommonShuffleMode == Permute)
+ continue;
+ // If the extract index is not the same as the operation number, it is a
+ // permutation.
+ if (IntIdx != I) {
+ CommonShuffleMode = Permute;
+ continue;
+ }
+ // Check the shuffle mode for the current operation.
+ if (!Vec1)
+ Vec1 = Vec;
+ else if (Vec != Vec1)
+ Vec2 = Vec;
+ // Example: shufflevector A, B, <0,5,2,7>
+ // I is odd and IntIdx for A == I - FirstAlternate shuffle.
+ // I is even and IntIdx for B == I - FirstAlternate shuffle.
+ // Example: shufflevector A, B, <4,1,6,3>
+ // I is even and IntIdx for A == I - SecondAlternate shuffle.
+ // I is odd and IntIdx for B == I - SecondAlternate shuffle.
+ const bool IIsEven = I & 1;
+ const bool CurrVecIsA = Vec == Vec1;
+ const bool IIsOdd = !IIsEven;
+ const bool CurrVecIsB = !CurrVecIsA;
+ ShuffleMode CurrentShuffleMode =
+ ((IIsOdd && CurrVecIsA) || (IIsEven && CurrVecIsB)) ? FirstAlternate
+ : SecondAlternate;
+ // Common mode is not set or the same as the shuffle mode of the current
+ // operation - alternate.
+ if (CommonShuffleMode == Unknown)
+ CommonShuffleMode = CurrentShuffleMode;
+ // Common shuffle mode is not the same as the shuffle mode of the current
+ // operation - permutation.
+ if (CommonShuffleMode != CurrentShuffleMode)
+ CommonShuffleMode = Permute;
+ }
+ // If we're not crossing lanes in different vectors, consider it as blending.
+ if ((CommonShuffleMode == FirstAlternate ||
+ CommonShuffleMode == SecondAlternate) &&
+ Vec2)
+ return TargetTransformInfo::SK_Alternate;
+ // If Vec2 was never used, we have a permutation of a single vector, otherwise
+ // we have permutation of 2 vectors.
+ return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
+ : TargetTransformInfo::SK_PermuteSingleSrc;
+}
+
///\returns Opcode that can be clubbed with \p Op to create an alternate
/// sequence which can later be merged as a ShuffleVector instruction.
static unsigned getAltOpcode(unsigned Op) {
@@ -1736,6 +1849,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
if (isSplat(VL)) {
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
}
+ if (getSameOpcode(VL) == Instruction::ExtractElement) {
+ Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
+ if (ShuffleKind.hasValue()) {
+ int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
+ for (auto *V : VL) {
+ // If all users of instruction are going to be vectorized and this
+ // instruction itself is not going to be vectorized, consider this
+ // instruction as dead and remove its cost from the final cost of the
+ // vectorized tree.
+ if (areAllUsersVectorized(cast<Instruction>(V)) &&
+ !ScalarToTreeEntry.count(V)) {
+ auto *IO = cast<ConstantInt>(
+ cast<ExtractElementInst>(V)->getIndexOperand());
+ Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+ IO->getZExtValue());
+ }
+ }
+ return Cost;
+ }
+ }
return getGatherCost(E->Scalars);
}
unsigned Opcode = getSameOpcode(VL);
OpenPOWER on IntegriCloud