diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2014-09-21 15:19:15 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2014-09-21 15:19:15 +0000 |
| commit | b67bd262ea7b2f9b756a7f2e3b16f6aa91f941e8 (patch) | |
| tree | 84f9bb10370b0cff1fd257baa2d2bf2c0a08a613 /llvm/lib/CodeGen | |
| parent | d649235fc3f96119d4d5ce89d91442836f863f04 (diff) | |
| download | bcm5719-llvm-b67bd262ea7b2f9b756a7f2e3b16f6aa91f941e8.tar.gz bcm5719-llvm-b67bd262ea7b2f9b756a7f2e3b16f6aa91f941e8.zip | |
Refactor reciprocal square root estimate into target-independent function; NFC.
This is purely a plumbing patch. No functional changes intended.
The ultimate goal is to allow targets other than PowerPC (certainly X86 and Aarch64) to turn this:
z = y / sqrt(x)
into:
z = y * rsqrte(x)
using whatever HW magic they can use. See http://llvm.org/bugs/show_bug.cgi?id=20900 .
The first step is to add a target hook for RSQRTE, take the already target-independent code selfishly hoarded by PPC, and put it into DAGCombiner.
Next steps:
The code in DAGCombiner::BuildRSQRTE() should be refactored further; tests that exercise that logic need to be added.
Logic in PPCTargetLowering::BuildRSQRTE() should be hoisted into DAGCombiner.
X86 and AArch64 overrides for TargetLowering.BuildRSQRTE() should be added.
Differential Revision: http://reviews.llvm.org/D5425
llvm-svn: 218219
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 |
1 files changed, 62 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa2f2d1f2b1..30ac63570ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -326,6 +326,7 @@ namespace { SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildRSQRTE(SDNode *N); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -6987,23 +6988,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && Options.UnsafeFPMath) { - // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + if (Options.UnsafeFPMath) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, + DAG.getConstantFP(Recip, VT)); + } + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (SDValue SqrtOp = BuildRSQRTE(N)) + return SqrtOp; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) @@ -11695,6 +11702,44 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } +/// Given an ISD::FDIV node with either a direct or indirect ISD::FSQRT operand, +/// generate a DAG expression using a reciprocal square root estimate op. +SDValue DAGCombiner::BuildRSQRTE(SDNode *N) { + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue N1 = N->getOperand(1); + + if (N1.getOpcode() == ISD::FSQRT) { + SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0), DCI); + if (RV.getNode()) { + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + } + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI); + if (RV.getNode()) { + DCI.AddToWorklist(RV.getNode()); + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI); + if (RV.getNode()) { + DCI.AddToWorklist(RV.getNode()); + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + } + } + + return SDValue(); +} + /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, |

