summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-23 15:13:09 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-23 15:13:09 +0000
commit532a0f122e3a20415704fe3899351332795238e6 (patch)
tree55fbc538379a7089a82c7a3bcf74f7f1bfb87620 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent747feb28e469a472e6ae5472385fac024ae1b632 (diff)
downloadbcm5719-llvm-532a0f122e3a20415704fe3899351332795238e6.tar.gz
bcm5719-llvm-532a0f122e3a20415704fe3899351332795238e6.zip
[SLPVectorizer] Add basic support for mul/and/or/xor horizontal reductions
Expand arithmetic reduction to include mul/and/or/xor instructions. This patch just fixes the SLPVectorizer - the effective reduction costs for AVX1+ are still poor (see rL344846) and will need to be improved before SLP sees this as a valid transform - but we can already see the effect on SSE2 tests. This partially helps PR37731, but doesn't fix it all as it still falls over on the extraction/reduction order for some reason. Differential Revision: https://reviews.llvm.org/D53473 llvm-svn: 345037
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp7
1 files changed, 5 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5fdbf219009..3592df3ede3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5126,9 +5126,12 @@ class HorizontalReduction {
/// Checks if the reduction operation can be vectorized.
bool isVectorizable() const {
return LHS && RHS &&
- // We currently only support adds && min/max reductions.
+ // We currently only support add/mul/logical && min/max reductions.
((Kind == RK_Arithmetic &&
- (Opcode == Instruction::Add || Opcode == Instruction::FAdd)) ||
+ (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
+ Opcode == Instruction::And || Opcode == Instruction::Or ||
+ Opcode == Instruction::Xor)) ||
((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
(Kind == RK_Min || Kind == RK_Max)) ||
(Opcode == Instruction::ICmp &&
OpenPOWER on IntegriCloud