summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-08-13 12:49:28 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2019-08-13 12:49:28 +0000
commit73f702ff192475b27039325a7428ce037771a5de (patch)
tree776623492053f71c64f8c4bae6298f5c3b575dda /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
parent0410489a34be7f82775b3ff5bbc00df29bdf5864 (diff)
downloadbcm5719-llvm-73f702ff192475b27039325a7428ce037771a5de.tar.gz
bcm5719-llvm-73f702ff192475b27039325a7428ce037771a5de.zip
[InstCombine] Non-canonical clamp-like pattern handling
Summary: Given a pattern like: ``` %old_cmp1 = icmp slt i32 %x, C2 %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high %old_x_offseted = add i32 %x, C1 %old_cmp0 = icmp ult i32 %old_x_offseted, C0 %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement ``` it can be rewritten as more canonical pattern: ``` %new_cmp1 = icmp slt i32 %x, -C1 %new_cmp2 = icmp sge i32 %x, C0-C1 %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low ``` Iff `-C1 s<= C2 s<= C0-C1` Also, `ULT` predicate can also be `UGE`; or `UGT` iff `C0 != -1` (+invert result) Also, `SLT` predicate can also be `SGE`; or `SGT` iff `C2 != INT_MAX` (+invert result) If `C1 == 0`, then all 3 instructions must be one-use; else at most either `%old_cmp1` or `%old_x_offseted` can have extra uses. NOTE: if we could reuse `%old_cmp1` as one of the comparisons we'll have to build, this could be less limiting. So there are two icmp's, each one with 3 predicate variants, so there are 9 fold variants: | | ULT | UGE | UGT | | SLT | https://rise4fun.com/Alive/yIJ | https://rise4fun.com/Alive/5BfN | https://rise4fun.com/Alive/INH | | SGE | https://rise4fun.com/Alive/hd8 | https://rise4fun.com/Alive/Abk | https://rise4fun.com/Alive/PlzS | | SGT | https://rise4fun.com/Alive/VYG | https://rise4fun.com/Alive/oMY | https://rise4fun.com/Alive/KrzC | {F9730206} This fold was brought up in https://reviews.llvm.org/D65148#1603922 by @dmgreen, and is needed to unblock that patch. This patch requires D65530. Reviewers: spatel, nikic, xbolva00, dmgreen Reviewed By: spatel Subscribers: hiraditya, llvm-commits, dmgreen Tags: #llvm Differential Revision: https://reviews.llvm.org/D65765 llvm-svn: 368687
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp146
1 files changed, 146 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8950d92fc6c..45c0f4ef03e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1118,6 +1118,149 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
return nullptr;
}
+// See if this is a pattern like:
+// %old_cmp1 = icmp slt i32 %x, C2
+// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high
+// %old_x_offseted = add i32 %x, C1
+// %old_cmp0 = icmp ult i32 %old_x_offseted, C0
+// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
+// This can be rewritten as more canonical pattern:
+// %new_cmp1 = icmp slt i32 %x, -C1
+// %new_cmp2 = icmp sge i32 %x, C0-C1
+// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x
+// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low
+// Iff -C1 s<= C2 s<= C0-C1
+// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
+// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
+static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X = Sel0.getTrueValue();
+ Value *Sel1 = Sel0.getFalseValue();
+
+ // First match the condition of the outermost select.
+ // Said condition must be one-use.
+ if (!Cmp0.hasOneUse())
+ return nullptr;
+ Value *Cmp00 = Cmp0.getOperand(0);
+ Constant *C0;
+ if (!match(Cmp0.getOperand(1),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
+ return nullptr;
+ // Canonicalize Cmp0 into the form we expect.
+ // FIXME: we shouldn't care about lanes that are 'undef' in the end?
+ switch (Cmp0.getPredicate()) {
+ case ICmpInst::Predicate::ICMP_ULT:
+ break; // Great!
+ case ICmpInst::Predicate::ICMP_ULE:
+ // We'd have to increment C0 by one, and for that it must not have all-ones
+ // element, but then it would have been canonicalized to 'ult' before
+ // we get here. So we can't do anything useful with 'ule'.
+ return nullptr;
+ case ICmpInst::Predicate::ICMP_UGT:
+ // We want to canonicalize it to 'ult', so we'll need to increment C0,
+ // which again means it must not have any all-ones elements.
+ if (!match(C0,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
+ APInt::getAllOnesValue(
+ C0->getType()->getScalarSizeInBits()))))
+ return nullptr; // Can't do, have all-ones element[s].
+ C0 = AddOne(C0);
+ std::swap(X, Sel1);
+ break;
+ case ICmpInst::Predicate::ICMP_UGE:
+ // The only way we'd get this predicate if this `icmp` has extra uses,
+ // but then we won't be able to do this fold.
+ return nullptr;
+ default:
+ return nullptr; // Unknown predicate.
+ }
+
+ // Now that we've canonicalized the ICmp, we know the X we expect;
+ // the select in other hand should be one-use.
+ if (!Sel1->hasOneUse())
+ return nullptr;
+
+ // We now can finish matching the condition of the outermost select:
+ // it should either be the X itself, or an addition of some constant to X.
+ Constant *C1;
+ if (Cmp00 == X)
+ C1 = ConstantInt::getNullValue(Sel0.getType());
+ else if (!match(Cmp00,
+ m_Add(m_Specific(X),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
+ return nullptr;
+
+ Value *Cmp1;
+ ICmpInst::Predicate Pred1;
+ Constant *C2;
+ Value *ReplacementLow, *ReplacementHigh;
+ if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
+ m_Value(ReplacementHigh))) ||
+ !match(Cmp1,
+ m_ICmp(Pred1, m_Specific(X),
+ m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
+ return nullptr;
+
+ if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
+ return nullptr; // Not enough one-use instructions for the fold.
+ // FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
+ // two comparisons we'll need to build.
+
+ // Canonicalize Cmp1 into the form we expect.
+ // FIXME: we shouldn't care about lanes that are 'undef' in the end?
+ switch (Pred1) {
+ case ICmpInst::Predicate::ICMP_SLT:
+ break;
+ case ICmpInst::Predicate::ICMP_SLE:
+ // We'd have to increment C2 by one, and for that it must not have signed
+ // max element, but then it would have been canonicalized to 'slt' before
+ // we get here. So we can't do anything useful with 'sle'.
+ return nullptr;
+ case ICmpInst::Predicate::ICMP_SGT:
+ // We want to canonicalize it to 'slt', so we'll need to increment C2,
+ // which again means it must not have any signed max elements.
+ if (!match(C2,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
+ APInt::getSignedMaxValue(
+ C2->getType()->getScalarSizeInBits()))))
+ return nullptr; // Can't do, have signed max element[s].
+ C2 = AddOne(C2);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::Predicate::ICMP_SGE:
+ // Also non-canonical, but here we don't need to change C2,
+ // so we don't have any restrictions on C2, so we can just handle it.
+ std::swap(ReplacementLow, ReplacementHigh);
+ break;
+ default:
+ return nullptr; // Unknown predicate.
+ }
+
+ // The thresholds of this clamp-like pattern.
+ auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
+ auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
+
+ // The fold has a precondition 1: C2 s>= ThresholdLow
+ auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2,
+ ThresholdLowIncl);
+ if (!match(Precond1, m_One()))
+ return nullptr;
+ // The fold has a precondition 2: C2 s<= ThresholdHigh
+ auto *Precond2 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SLE, C2,
+ ThresholdHighExcl);
+ if (!match(Precond2, m_One()))
+ return nullptr;
+
+ // All good, finally emit the new pattern.
+ Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
+ Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
+ Value *MaybeReplacedLow =
+ Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
+ Instruction *MaybeReplacedHigh =
+ SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
+
+ return MaybeReplacedHigh;
+}
+
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
@@ -1130,6 +1273,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, Builder))
return NewAbs;
+ if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
+ return NewAbs;
+
bool Changed = adjustMinMax(SI, *ICI);
if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))
OpenPOWER on IntegriCloud