summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2017-02-23 09:40:38 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2017-02-23 09:40:38 +0000
commit68f2402c6138b51221479bee285bcaddb495b629 (patch)
treeec531122bebc223083375636a5fc793ca93e9cd6 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent39af790bb1eb0079cd1584fead63f61612459da7 (diff)
downloadbcm5719-llvm-68f2402c6138b51221479bee285bcaddb495b629.tar.gz
bcm5719-llvm-68f2402c6138b51221479bee285bcaddb495b629.zip
[SLP] Fix for PR32036: Vectorized horizontal reduction returning wrong
result Summary: If the same value is used several times as an extra value, SLP vectorizer takes it into account only once instead of actual number of using. For example: ``` int val = 1; for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { val = val + input[y * 8 + x] + 3; } } ``` We have 2 extra rguments: `1` - initial value of horizontal reduction and `3`, which is added 8*8 times to the reduction. Before the patch we added `1` to the reduction value and added once `3`, though it must be added 64 times. Reviewers: mkuper, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30262 llvm-svn: 295949
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp32
1 files changed, 19 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cbb0e7a0c31..20cc6384f07 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -304,6 +304,7 @@ public:
typedef SmallVector<Instruction *, 16> InstrList;
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
+ typedef MapVector<Value *, std::vector<DebugLoc>> ExtraValueToDebugLocsMap;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
@@ -333,7 +334,7 @@ public:
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
- Value *vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues);
+ Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
@@ -352,7 +353,7 @@ public:
/// into account (anf updating it, if required) list of externally used
/// values stored in \p ExternallyUsedValues.
void buildTree(ArrayRef<Value *> Roots,
- MapVector<Value *, DebugLoc> &ExternallyUsedValues,
+ ExtraValueToDebugLocsMap &ExternallyUsedValues,
ArrayRef<Value *> UserIgnoreLst = None);
/// Clear the internal data structures that are created by 'buildTree'.
@@ -953,11 +954,11 @@ private:
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
- MapVector<Value *, DebugLoc> ExternallyUsedValues;
+ ExtraValueToDebugLocsMap ExternallyUsedValues;
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
}
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- MapVector<Value *, DebugLoc> &ExternallyUsedValues,
+ ExtraValueToDebugLocsMap &ExternallyUsedValues,
ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
UserIgnoreList = UserIgnoreLst;
@@ -2801,12 +2802,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
- MapVector<Value *, DebugLoc> ExternallyUsedValues;
+ ExtraValueToDebugLocsMap ExternallyUsedValues;
return vectorizeTree(ExternallyUsedValues);
}
Value *
-BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
+BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
@@ -2868,7 +2869,6 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
assert(ExternallyUsedValues.count(Scalar) &&
"Scalar with nullptr as an external user must be registered in "
"ExternallyUsedValues map");
- DebugLoc DL = ExternallyUsedValues[Scalar];
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
Builder.SetInsertPoint(VecI->getParent(),
std::next(VecI->getIterator()));
@@ -2878,8 +2878,8 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
Ex = extend(ScalarRoot, Ex, Scalar->getType());
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
+ std::swap(ExternallyUsedValues[Ex], ExternallyUsedValues[Scalar]);
ExternallyUsedValues.erase(Scalar);
- ExternallyUsedValues[Ex] = DL;
continue;
}
@@ -4439,9 +4439,11 @@ public:
Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
- MapVector<Value *, DebugLoc> ExternallyUsedValues;
+ BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
+ // The same extra argument may be used several time, so log each attempt
+ // to use it.
for (auto &Pair : ExtraArgs)
- ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc();
+ ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc());
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
@@ -4489,9 +4491,13 @@ public:
Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
}
for (auto &Pair : ExternallyUsedValues) {
- Builder.SetCurrentDebugLocation(Pair.second);
- VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
- Pair.first, "bin.extra");
+ // Add each externally used value to the final reduction.
+ assert(!Pair.second.empty() && "At least one DebugLoc must be added.");
+ for (auto &DL : Pair.second) {
+ Builder.SetCurrentDebugLocation(DL);
+ VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
+ Pair.first, "bin.extra");
+ }
}
// Update users.
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
OpenPOWER on IntegriCloud