diff options
author | David Bolvansky <david.bolvansky@gmail.com> | 2019-08-27 10:22:40 +0000 |
---|---|---|
committer | David Bolvansky <david.bolvansky@gmail.com> | 2019-08-27 10:22:40 +0000 |
commit | 0c2692108c458c80f0ed4a336695dc6293d8508b (patch) | |
tree | 444389c4ccc1824127720eae5f3322f038c6c657 /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | |
parent | a7f226f9dba3472173a1ea261a3f79114caf936c (diff) | |
download | bcm5719-llvm-0c2692108c458c80f0ed4a336695dc6293d8508b.tar.gz bcm5719-llvm-0c2692108c458c80f0ed4a336695dc6293d8508b.zip |
[InstCombine] Fold select with ctlz to cttz
Summary:
Handle pattern [0]:
int ctz(unsigned int a)
{
int c = __clz(a & -a);
return a ? 31 - c : c;
}
In reality, the compiler can generate much better code for cttz, so fold away this pattern.
https://godbolt.org/z/c5kPtV
[0] https://community.arm.com/community-help/f/discussions/2114/count-trailing-zeros
Reviewers: spatel, nikic, lebedev.ri, dmgreen, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, javed.absar, kristof.beyls, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66308
llvm-svn: 370037
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 50f6b2bdfd8..e7e6969f4a2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -785,6 +785,41 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return nullptr; } +/// Fold the following code sequence: +/// \code +/// int a = ctlz(x & -x); +// x ? 31 - a : a; +/// \code +/// +/// into: +/// cttz(x) +static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy &Builder) { + unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); + if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero())) + return nullptr; + + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + if (!match(FalseVal, + m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1)))) + return nullptr; + + if (!match(TrueVal, m_Intrinsic<Intrinsic::ctlz>())) + return nullptr; + + Value *X = ICI->getOperand(0); + auto *II = cast<IntrinsicInst>(TrueVal); + if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X))))) + return nullptr; + + Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz, + II->getType()); + return CallInst::Create(F, {X, II->getArgOperand(1)}); +} + /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -1432,6 +1467,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder)) return V; + if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder)) + return V; + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); |