summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorFarhana Aleen <farhana.aleen@gmail.com>2018-07-02 17:55:31 +0000
committerFarhana Aleen <farhana.aleen@gmail.com>2018-07-02 17:55:31 +0000
commit3b416db19ba91207649430cfa4604ffd331bf86c (patch)
treef59b245e92a5ed1ddabd0ed097a1d385f76756af /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent40a9f2251b75012d1d9ce8f86a834ffef9edfb0f (diff)
downloadbcm5719-llvm-3b416db19ba91207649430cfa4604ffd331bf86c.tar.gz
bcm5719-llvm-3b416db19ba91207649430cfa4604ffd331bf86c.zip
[SLP] Recognize min/max pattern using instructions producing same values.
Summary: It is common to have the following min/max pattern during the intermediate stages of SLP since we only optimize at the end. This patch tries to catch such patterns and allow more vectorization. %1 = extractelement <2 x i32> %a, i32 0 %2 = extractelement <2 x i32> %a, i32 1 %cond = icmp sgt i32 %1, %2 %3 = extractelement <2 x i32> %a, i32 0 %4 = extractelement <2 x i32> %a, i32 1 %select = select i1 %cond, i32 %3, i32 %4 Author: FarhanaAleen Reviewed By: ABataev, RKSimon, spatel Differential Revision: https://reviews.llvm.org/D47608 llvm-svn: 336130
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp71
1 files changed, 71 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 81878b7abb1..dd654b7b493 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5469,6 +5469,77 @@ class HorizontalReduction {
return OperationData(
Instruction::FCmp, LHS, RHS, RK_Max,
cast<Instruction>(Select->getCondition())->hasNoNaNs());
+ } else {
+ // Try harder: look for min/max pattern based on instructions producing
+ // same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
+ // During the intermediate stages of SLP, it's very common to have
+ // pattern like this (since optimizeGatherSequence is run only once
+ // at the end):
+ // %1 = extractelement <2 x i32> %a, i32 0
+ // %2 = extractelement <2 x i32> %a, i32 1
+ // %cond = icmp sgt i32 %1, %2
+ // %3 = extractelement <2 x i32> %a, i32 0
+ // %4 = extractelement <2 x i32> %a, i32 1
+ // %select = select i1 %cond, i32 %3, i32 %4
+ CmpInst::Predicate Pred;
+ Instruction *L1;
+ Instruction *L2;
+
+ LHS = Select->getTrueValue();
+ RHS = Select->getFalseValue();
+ Value *Cond = Select->getCondition();
+
+ // TODO: Support inverse predicates.
+ if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
+ if (!isa<ExtractElementInst>(RHS) ||
+ !L2->isIdenticalTo(cast<Instruction>(RHS)))
+ return OperationData(V);
+ } else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
+ if (!isa<ExtractElementInst>(LHS) ||
+ !L1->isIdenticalTo(cast<Instruction>(LHS)))
+ return OperationData(V);
+ } else {
+ if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
+ return OperationData(V);
+ if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
+ !L1->isIdenticalTo(cast<Instruction>(LHS)) ||
+ !L2->isIdenticalTo(cast<Instruction>(RHS)))
+ return OperationData(V);
+ }
+ switch (Pred) {
+ default:
+ return OperationData(V);
+
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ return OperationData(Instruction::ICmp, LHS, RHS, RK_UMin);
+
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return OperationData(Instruction::ICmp, LHS, RHS, RK_Min);
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ return OperationData(Instruction::FCmp, LHS, RHS, RK_Min,
+ cast<Instruction>(Cond)->hasNoNaNs());
+
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ return OperationData(Instruction::ICmp, LHS, RHS, RK_UMax);
+
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ return OperationData(Instruction::ICmp, LHS, RHS, RK_Max);
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ return OperationData(Instruction::FCmp, LHS, RHS, RK_Max,
+ cast<Instruction>(Cond)->hasNoNaNs());
+ }
}
}
return OperationData(V);
OpenPOWER on IntegriCloud