summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2017-01-25 09:54:38 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2017-01-25 09:54:38 +0000
commitd28ab559a7e4c60bb56e5944bffe5085b7b16072 (patch)
tree5171c8ee62a5f8a2c91aa64b6c4bdbad7ca8986b /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent16f1e5f1cade241372b545b5addca9e6867497ed (diff)
downloadbcm5719-llvm-d28ab559a7e4c60bb56e5944bffe5085b7b16072.tar.gz
bcm5719-llvm-d28ab559a7e4c60bb56e5944bffe5085b7b16072.zip
[SLP] Improve horizontal vectorization for non-power-of-2 number of
instructions. If number of instructions in horizontal reduction list is not power of 2 then only PowerOf2Floor(NumberOfInstructions) last elements are actually vectorized, other instructions remain scalar. Patch tries to vectorize the remaining elements either. Differential Revision: https://reviews.llvm.org/D28959 llvm-svn: 293042
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp6
1 files changed, 4 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3c06b7e3af8..ae5c32a16e4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4262,7 +4262,7 @@ public:
Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
- for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+ while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ReductionOps);
if (V.shouldReorder()) {
@@ -4270,7 +4270,7 @@ public:
V.buildTree(Reversed, ReductionOps);
}
if (V.isTreeTinyAndNotFullyVectorizable())
- continue;
+ break;
V.computeMinimumValueSizes();
@@ -4296,6 +4296,8 @@ public:
ReducedSubTree, "bin.rdx");
} else
VectorizedTree = ReducedSubTree;
+ i += ReduxWidth;
+ ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
if (VectorizedTree) {
OpenPOWER on IntegriCloud