diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 10:26:23 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 10:26:23 +0000 |
commit | f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8 (patch) | |
tree | ae05376466f10cfb5faa01e3f08c58cbc241f1e8 /llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | |
parent | c65204148c13d6c2cdfa018b2dd2bf8c306cc7a5 (diff) | |
download | bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.tar.gz bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.zip |
[InstCombine] Shift amount reassociation in bittest: trunc-of-lshr (PR42399)
Summary:
Finally, the fold i was looking forward to :)
The legality check is muddy, i doubt i've groked the full generalization,
but it handles all the cases i care about, and can come up with:
https://rise4fun.com/Alive/26j
I.e. we can perform the fold if **any** of the following is true:
* The shift amount is either zero or one less than widest bitwidth
* Either of the values being shifted has at most lowest bit set
* The value that is being shifted by `shl` (which is not truncated) should have no less leading zeros than the total shift amount;
* The value that is being shifted by `lshr` (which **is** truncated) should have no less leading zeros than the widest bit width minus total shift amount minus one
I strongly suspect there is some better generalization, but i'm not aware of it as of right now.
For now i also avoided using actual `computeKnownBits()`, but restricted it to constants.
Reviewers: spatel, nikic, xbolva00
Reviewed By: spatel
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66383
llvm-svn: 370324
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 72 |
1 files changed, 58 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 940e5554bb3..1117586549e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3379,7 +3379,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I, // we should move shifts to the same hand of 'and', i.e. rewrite as // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) // We are only interested in opposite logical shifts here. -// One of the shifts can be truncated. For now, it can only be 'shl'. +// One of the shifts can be truncated. // If we can, we want to end up creating 'lshr' shift. static Value * foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, @@ -3413,14 +3413,6 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, "We did not look past any shifts while matching XShift though."); bool HadTrunc = WidestTy != I.getOperand(0)->getType(); - if (HadTrunc) { - // We did indeed have a truncation. For now, let's only proceed if the 'shl' - // was truncated, since that does not require any extra legality checks. - // FIXME: trunc-of-lshr. - if (!match(YShift, m_Shl(m_Value(), m_Value()))) - return nullptr; - } - // If YShift is a 'lshr', swap the shifts around. if (match(YShift, m_LShr(m_Value(), m_Value()))) std::swap(XShift, YShift); @@ -3462,16 +3454,68 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, /*isNUW=*/false, SQ.getWithInstruction(&I))); if (!NewShAmt) return nullptr; + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); + unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width? // FIXME: could also rely on ConstantRange. - if (!match(NewShAmt, m_SpecificInt_ICMP( - ICmpInst::Predicate::ICMP_ULT, - APInt(NewShAmt->getType()->getScalarSizeInBits(), - WidestTy->getScalarSizeInBits())))) + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(WidestBitWidth, WidestBitWidth)))) return nullptr; + + // An extra legality check is needed if we had trunc-of-lshr. + if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { + auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, + WidestShift]() { + // It isn't obvious whether it's worth it to analyze non-constants here. + // Also, let's basically give up on non-splat cases, pessimizing vectors. + // If *any* of these preconditions matches we can perform the fold. + Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() + ? NewShAmt->getSplatValue() + : NewShAmt; + // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. + if (NewShAmtSplat && + (NewShAmtSplat->isNullValue() || + NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) + return true; + // We consider *min* leading zeros so a single outlier + // blocks the transform as opposed to allowing it. + if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: NewShAmt u<= countLeadingZeros(C) + if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) + return true; + } + if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) + if (NewShAmtSplat) { + APInt AdjNewShAmt = + (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); + if (AdjNewShAmt.ule(MinLeadZero)) + return true; + } + } + return false; // Can't tell if it's ok. + }; + if (!CanFold()) + return nullptr; + } + // All good, we can do this fold. - NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); X = Builder.CreateZExt(X, WidestTy); + Y = Builder.CreateZExt(Y, WidestTy); // The shift is the same that was for X. Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr ? Builder.CreateLShr(X, NewShAmt) |