diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2018-07-09 19:06:42 +0000 |
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2018-07-09 19:06:42 +0000 |
| commit | 5ccae1750b26871670015755f2d2f1acd4860cb7 (patch) | |
| tree | 269c2d892dcba4907b3862e286146bf4d96ed295 /llvm/lib | |
| parent | 22a0c8dbc8bc008928b319d44ad296738940fe72 (diff) | |
| download | bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.tar.gz bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.zip | |
[X86][TLI] DAGCombine: Unfold variable bit-clearing mask to two shifts.
Summary:
This adds a reverse transform for the instcombine canonicalizations
that were added in D47980, D47981.
As discussed later, that was worse at least for the code size,
and potentially for the performance, too.
https://rise4fun.com/Alive/Zmpl
Reviewers: craig.topper, RKSimon, spatel
Reviewed By: spatel
Subscribers: reames, llvm-commits
Differential Revision: https://reviews.llvm.org/D48768
llvm-svn: 336585
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 2 |
3 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 42bc330c641..34be54db23b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -409,6 +409,7 @@ namespace { SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue unfoldMaskedMerge(SDNode *N); + SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans); SDValue rebuildSetCC(SDValue N); @@ -4169,6 +4170,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { return false; } +// Unfold +// x & (-1 'logical shift' y) +// To +// (x 'opposite logical shift' y) 'logical shift' y +// if it is better for performance. +SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { + assert(N->getOpcode() == ISD::AND); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Do we actually prefer shifts over mask? + if (!TLI.preferShiftsToClearExtremeBits(N0)) + return SDValue(); + + // Try to match (-1 '[outer] logical shift' y) + unsigned OuterShift; + unsigned InnerShift; // The opposite direction to the OuterShift. + SDValue Y; // Shift amount. + auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { + if (!M.hasOneUse()) + return false; + OuterShift = M->getOpcode(); + if (OuterShift == ISD::SHL) + InnerShift = ISD::SRL; + else if (OuterShift == ISD::SRL) + InnerShift = ISD::SHL; + else + return false; + if (!isAllOnesConstant(M->getOperand(0))) + return false; + Y = M->getOperand(1); + return true; + }; + + SDValue X; + if (matchMask(N1)) + X = N0; + else if (matchMask(N0)) + X = N1; + else + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // tmp = x 'opposite logical shift' y + SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); + // ret = tmp 'logical shift' y + SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); + + return T1; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4466,6 +4521,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return BSwap; } + if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) + return Shifts; + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 321fb950310..b0efc4d1ed5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4786,6 +4786,20 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const { return Subtarget.hasSSE2(); } +bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const { + EVT VT = Y.getValueType(); + + // For vectors, we don't have a preference, but we probably want a mask. + if (VT.isVector()) + return false; + + // 64-bit shifts on 32-bit targets produce really bad bloated code. + if (VT == MVT::i64 && !Subtarget.is64Bit()) + return false; + + return true; +} + MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { MVT VT = MVT::getIntegerVT(NumBits); if (isTypeLegal(VT)) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index d7e33442181..4fadf0543c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -828,6 +828,8 @@ namespace llvm { bool hasAndNot(SDValue Y) const override; + bool preferShiftsToClearExtremeBits(SDValue Y) const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } |

