diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-09-05 18:15:07 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-09-05 18:15:07 +0000 |
| commit | b8d6ba3ca203847be7cb3e4e2e352cf6c5502ca4 (patch) | |
| tree | f1e31e2947b19ab9b1d8e588e594f42370b3e411 /llvm/lib | |
| parent | 00e04b0a6d51a415ea70133bbc2c6dad9cc72ecc (diff) | |
| download | bcm5719-llvm-b8d6ba3ca203847be7cb3e4e2e352cf6c5502ca4.tar.gz bcm5719-llvm-b8d6ba3ca203847be7cb3e4e2e352cf6c5502ca4.zip | |
[X86] Override BuildSDIVPow2 for X86.
As noted in PR43197, we can use test+add+cmov+sra to implement
signed division by a power of 2.
This is based off the similar version in AArch64, but I've
adjusted it to use target independent nodes where AArch64 uses
target specific CMP and CSEL nodes. I've also blocked INT_MIN
as the transform isn't valid for that.
I've limited this to i32 and i64 on 64-bit targets for now and only
when CMOV is supported. i8 and i16 need further investigation to be
sure they get promoted to i32 well.
I adjusted a few tests to enable cmov to demonstrate the new
codegen. I also changed twoaddr-coalesce-3.ll to 32-bit mode
without cmov to avoid perturbing the scenario that is being
set up there.
Differential Revision: https://reviews.llvm.org/D67087
llvm-svn: 371104
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 55 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 |
2 files changed, 58 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f4a1e33030b..d7efb0a126d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20080,6 +20080,61 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } +SDValue +X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && + "Unexpected divisor!"); + + // Only perform this transform if CMOV is supported otherwise the select + // below will become a branch. + if (!Subtarget.hasCMov()) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + // FIXME: Support i8/i16. + if ((VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + + // If the divisor is 2 or -2, the default expansion is better. + if (Lg2 == 1) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 05e4f16fc49..fbc9509f058 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1478,6 +1478,9 @@ namespace llvm { /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const override; }; namespace X86 { |

