diff options
author | Anton Afanasyev <anton.a.afanasyev@gmail.com> | 2019-11-21 18:41:52 +0300 |
---|---|---|
committer | Anton Afanasyev <anton.a.afanasyev@gmail.com> | 2019-12-03 19:29:27 +0300 |
commit | a315519c17abaa621eddd30fd116ac2e030a36e9 (patch) | |
tree | bc61fc6262033e5c997fd199df3f30dd6e64b789 /llvm/lib/Transforms | |
parent | c094e7dc4b3f9d1c1e590b008bb1cc46e3496abd (diff) | |
download | bcm5719-llvm-a315519c17abaa621eddd30fd116ac2e030a36e9.tar.gz bcm5719-llvm-a315519c17abaa621eddd30fd116ac2e030a36e9.zip |
[SLP] Enhance SLPVectorizer to vectorize different combinations of aggregates
Summary:
Make SLPVectorize to recognize homogeneous aggregates like
`{<2 x float>, <2 x float>}`, `{{float, float}, {float, float}}`,
`[2 x {float, float}]` and so on.
It's a follow-up of https://reviews.llvm.org/D70068.
Merged `findBuildVector()` and `findBuildAggregate()` to
one `findBuildAggregate()` function making it recursive
to recognize multidimensional aggregates. Aggregates required
to be homogeneous.
Reviewers: RKSimon, ABataev, dtemirbulatov, spatel, vporpo
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70587
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 118 |
1 files changed, 56 insertions, 62 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e4f50a8787d..949988415a4 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -629,9 +629,10 @@ public: return MinVecRegSize; } - /// Check if ArrayType or StructType is isomorphic to some VectorType. - /// Accepts homogeneous aggregate of vectors like - /// { <2 x float>, <2 x float> } + /// Check if homogeneous aggregate is isomorphic to some VectorType. + /// Accepts homogeneous multidimensional aggregate of scalars/vectors like + /// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> }, + /// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on. /// /// \returns number of elements in vector if isomorphism exists, 0 otherwise. unsigned canMapToVector(Type *T, const DataLayout &DL) const; @@ -3088,20 +3089,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, } unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const { - unsigned N; - Type *EltTy; - auto *ST = dyn_cast<StructType>(T); - if (ST) { - N = ST->getNumElements(); - EltTy = *ST->element_begin(); - } else { - N = cast<ArrayType>(T)->getNumElements(); - EltTy = cast<ArrayType>(T)->getElementType(); - } - - if (auto *VT = dyn_cast<VectorType>(EltTy)) { - EltTy = VT->getElementType(); - N *= VT->getNumElements(); + unsigned N = 1; + Type *EltTy = T; + + while (isa<CompositeType>(EltTy)) { + if (auto *ST = dyn_cast<StructType>(EltTy)) { + // Check that struct is homogeneous. + for (const auto *Ty : ST->elements()) + if (Ty != *ST->element_begin()) + return 0; + N *= ST->getNumElements(); + EltTy = *ST->element_begin(); + } else { + auto *SeqT = cast<SequentialType>(EltTy); + N *= SeqT->getNumElements(); + EltTy = SeqT->getElementType(); + } } if (!isValidElementType(EltTy)) @@ -3109,12 +3112,6 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const { uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N)); if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T)) return 0; - if (ST) { - // Check that struct is homogeneous. - for (const auto *Ty : ST->elements()) - if (Ty != *ST->element_begin()) - return 0; - } return N; } @@ -6940,57 +6937,54 @@ private: /// %rb = insertelement <4 x float> %ra, float %s1, i32 1 /// %rc = insertelement <4 x float> %rb, float %s2, i32 2 /// %rd = insertelement <4 x float> %rc, float %s3, i32 3 -/// starting from the last insertelement instruction. +/// starting from the last insertelement or insertvalue instruction. /// -/// Returns true if it matches -static bool findBuildVector(InsertElementInst *LastInsertElem, - TargetTransformInfo *TTI, - SmallVectorImpl<Value *> &BuildVectorOpds, - int &UserCost) { - UserCost = 0; - Value *V = nullptr; - do { - if (auto *CI = dyn_cast<ConstantInt>(LastInsertElem->getOperand(2))) { - UserCost += TTI->getVectorInstrCost(Instruction::InsertElement, - LastInsertElem->getType(), - CI->getZExtValue()); - } - BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); - V = LastInsertElem->getOperand(0); - if (isa<UndefValue>(V)) - break; - LastInsertElem = dyn_cast<InsertElementInst>(V); - if (!LastInsertElem || !LastInsertElem->hasOneUse()) - return false; - } while (true); - std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); - return true; -} - -/// Like findBuildVector, but looks for construction of aggregate. -/// Accepts homegeneous aggregate of vectors like { <2 x float>, <2 x float> }. +/// Also recognize aggregates like {<2 x float>, <2 x float>}, +/// {{float, float}, {float, float}}, [2 x {float, float}] and so on. +/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples. +/// +/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type. /// /// \return true if it matches. -static bool findBuildAggregate(InsertValueInst *IV, TargetTransformInfo *TTI, +static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl<Value *> &BuildVectorOpds, int &UserCost) { + assert((isa<InsertElementInst>(LastInsertInst) || + isa<InsertValueInst>(LastInsertInst)) && + "Expected insertelement or insertvalue instruction!"); UserCost = 0; do { - if (auto *IE = dyn_cast<InsertElementInst>(IV->getInsertedValueOperand())) { + Value *InsertedOperand; + if (auto *IE = dyn_cast<InsertElementInst>(LastInsertInst)) { + InsertedOperand = IE->getOperand(1); + LastInsertInst = IE->getOperand(0); + if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) { + UserCost += TTI->getVectorInstrCost(Instruction::InsertElement, + IE->getType(), CI->getZExtValue()); + } + } else { + auto *IV = cast<InsertValueInst>(LastInsertInst); + InsertedOperand = IV->getInsertedValueOperand(); + LastInsertInst = IV->getAggregateOperand(); + } + if (isa<InsertElementInst>(InsertedOperand) || + isa<InsertValueInst>(InsertedOperand)) { int TmpUserCost; - SmallVector<Value *, 4> TmpBuildVectorOpds; - if (!findBuildVector(IE, TTI, TmpBuildVectorOpds, TmpUserCost)) + SmallVector<Value *, 8> TmpBuildVectorOpds; + if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds, + TmpUserCost)) return false; - BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), TmpBuildVectorOpds.rend()); + BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), + TmpBuildVectorOpds.rend()); UserCost += TmpUserCost; } else { - BuildVectorOpds.push_back(IV->getInsertedValueOperand()); + BuildVectorOpds.push_back(InsertedOperand); } - Value *V = IV->getAggregateOperand(); - if (isa<UndefValue>(V)) + if (isa<UndefValue>(LastInsertInst)) break; - IV = dyn_cast<InsertValueInst>(V); - if (!IV || !IV->hasOneUse()) + if ((!isa<InsertValueInst>(LastInsertInst) && + !isa<InsertElementInst>(LastInsertInst)) || + !LastInsertInst->hasOneUse()) return false; } while (true); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); @@ -7177,7 +7171,7 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { int UserCost; SmallVector<Value *, 16> BuildVectorOpds; - if (!findBuildVector(IEI, TTI, BuildVectorOpds, UserCost) || + if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, UserCost) || (llvm::all_of(BuildVectorOpds, [](Value *V) { return isa<ExtractElementInst>(V); }) && isShuffle(BuildVectorOpds))) |