diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2014-10-24 17:02:16 +0000 | 
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2014-10-24 17:02:16 +0000 | 
| commit | 957efc23bb87d341a1b478d87a48bb888c2d4068 (patch) | |
| tree | 48ae584987b7970cb90899c03590938f4d622799 /llvm/lib/CodeGen/SelectionDAG | |
| parent | 5e3a421bfcb891fc7821daa501e30c113fb1bf16 (diff) | |
| download | bcm5719-llvm-957efc23bb87d341a1b478d87a48bb888c2d4068.tar.gz bcm5719-llvm-957efc23bb87d341a1b478d87a48bb888c2d4068.zip | |
Use rsqrt (X86) to speed up reciprocal square root calcs
This is a first step for generating SSE rsqrt instructions for
reciprocal square root calcs when fast-math is allowed.
For now, be conservative and only enable this for AMD btver2
where performance improves significantly - for example, 29%
on llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c
(if we convert the data type to single-precision float).
This patch adds a two constant version of the Newton-Raphson
refinement algorithm to DAGCombiner that can be selected by any target
via a parameter returned by getRsqrtEstimate()..
See PR20900 for more details:
http://llvm.org/bugs/show_bug.cgi?id=20900
Differential Revision: http://reviews.llvm.org/D5658
llvm-svn: 220570
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 117 | 
1 files changed, 77 insertions, 40 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6fe4ade4aa8..4ad2e5dd492 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -331,6 +331,8 @@ namespace {      SDValue BuildUDIV(SDNode *N);      SDValue BuildReciprocalEstimate(SDValue Op);      SDValue BuildRsqrtEstimate(SDValue Op); +    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); +    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,                                 bool DemandHighBits = true);      SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -7033,13 +7035,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {      // into a target-specific square root estimate instruction.      if (N1.getOpcode() == ISD::FSQRT) {        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { -        AddToWorklist(RV.getNode());          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);        }      } else if (N1.getOpcode() == ISD::FP_EXTEND &&                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { -        AddToWorklist(RV.getNode());          RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);          AddToWorklist(RV.getNode());          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -7047,7 +7047,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {      } else if (N1.getOpcode() == ISD::FP_ROUND &&                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { -        AddToWorklist(RV.getNode());          RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));          AddToWorklist(RV.getNode());          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -7068,7 +7067,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {          // We found a FSQRT, so try to make this fold:          // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)          if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { -          AddToWorklist(RV.getNode());            RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);            AddToWorklist(RV.getNode());            return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -7116,7 +7114,6 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {    if (DAG.getTarget().Options.UnsafeFPMath) {      // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)      if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { -      AddToWorklist(RV.getNode());        EVT VT = RV.getValueType();        RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);        AddToWorklist(RV.getNode()); @@ -11985,49 +11982,89 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {    return SDValue();  } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { -  if (Level >= AfterLegalizeDAG) -    return SDValue(); +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +///     => +///   X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, +                                          unsigned Iterations) { +  EVT VT = Arg.getValueType(); +  SDLoc DL(Arg); +  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); +   +  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that +  // this entire sequence requires only one FP constant. +  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); +  AddToWorklist(HalfArg.getNode()); +   +  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); +  AddToWorklist(HalfArg.getNode()); +   +  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) +  for (unsigned i = 0; i < Iterations; ++i) { +    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); +    AddToWorklist(NewEst.getNode()); +     +    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); +    AddToWorklist(NewEst.getNode()); +     +    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); +    AddToWorklist(NewEst.getNode()); +     +    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); +    AddToWorklist(Est.getNode()); +  } +  return Est; +} -  // Expose the DAG combiner to the target combiner implementations. -  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); -  unsigned Iterations = 0; -  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) { -    if (Iterations) { -      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) -      // For the reciprocal sqrt, we need to find the zero of the function: -      //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] -      //     => -      //   X_{i+1} = X_i (1.5 - A X_i^2 / 2) -      // As a result, we precompute A/2 prior to the iteration loop. -      EVT VT = Op.getValueType(); -      SDLoc DL(Op); -      SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT); +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +///     => +///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, +                                          unsigned Iterations) { +  EVT VT = Arg.getValueType(); +  SDLoc DL(Arg); +  SDValue MinusThree = DAG.getConstantFP(-3.0, VT); +  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); -      AddToWorklist(Est.getNode()); +  // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) +  for (unsigned i = 0; i < Iterations; ++i) { +    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); +    AddToWorklist(HalfEst.getNode()); -      // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that -      // this entire sequence requires only one FP constant. -      SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op); -      AddToWorklist(HalfArg.getNode()); +    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); +    AddToWorklist(Est.getNode()); -      HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op); -      AddToWorklist(HalfArg.getNode()); +    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); +    AddToWorklist(Est.getNode()); -      // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) -      for (unsigned i = 0; i < Iterations; ++i) { -        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); -        AddToWorklist(NewEst.getNode()); +    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); +    AddToWorklist(Est.getNode()); -        NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); -        AddToWorklist(NewEst.getNode()); +    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); +    AddToWorklist(Est.getNode()); +  } +  return Est; +} -        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst); -        AddToWorklist(NewEst.getNode()); +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { +  if (Level >= AfterLegalizeDAG) +    return SDValue(); -        Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); -        AddToWorklist(Est.getNode()); -      } +  // Expose the DAG combiner to the target combiner implementations. +  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); +  unsigned Iterations = 0; +  bool UseOneConstNR = false; +  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { +    AddToWorklist(Est.getNode()); +    if (Iterations) { +      Est = UseOneConstNR ? +        BuildRsqrtNROneConst(Op, Est, Iterations) : +        BuildRsqrtNRTwoConst(Op, Est, Iterations);      }      return Est;    } | 

