diff options
| author | Amaury Sechet <deadalnix@gmail.com> | 2019-08-22 15:35:45 +0000 | 
|---|---|---|
| committer | Amaury Sechet <deadalnix@gmail.com> | 2019-08-22 15:35:45 +0000 | 
| commit | 95cf66de7cc186253c59a009bb795da1cbe9d6f4 (patch) | |
| tree | 300a9ca9cb270809280cd6e3d1f3be627be78a81 /llvm | |
| parent | c9649eb9dab747c3b5c1d2b8ab6d54145fce40b2 (diff) | |
| download | bcm5719-llvm-95cf66de7cc186253c59a009bb795da1cbe9d6f4.tar.gz bcm5719-llvm-95cf66de7cc186253c59a009bb795da1cbe9d6f4.zip  | |
[DAGCombiner] Remove explicit call to AddToWorklist in sqrt and reciprocal computations
Summary: These nodes end up being processed regardless due to DAGCombiner ensuring arguments are processed. This changes the order in which nodes are processed, which fixes an issue on PowerPC.
Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri, mcberg2017, stefanp, hfinkel
Subscribers: nemanjai, MaskRay, jsji, steven.zhang, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66548
llvm-svn: 369662
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-recipest.ll | 33 | 
2 files changed, 10 insertions, 56 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30336353189..081544818f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20084,16 +20084,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {        // Newton iterations: Est = Est + Est (1 - Arg * Est)        for (int i = 0; i < Iterations; ++i) {          SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); -        AddToWorklist(NewEst.getNode()); -          NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); -        AddToWorklist(NewEst.getNode()); -          NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); -        AddToWorklist(NewEst.getNode()); -          Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); -        AddToWorklist(Est.getNode());        }      }      return Est; @@ -20118,31 +20111,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,    // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that    // this entire sequence requires only one FP constant.    SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); -  AddToWorklist(HalfArg.getNode()); -    HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); -  AddToWorklist(HalfArg.getNode());    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)    for (unsigned i = 0; i < Iterations; ++i) {      SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); -    AddToWorklist(NewEst.getNode()); -      NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); -    AddToWorklist(NewEst.getNode()); -      NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); -    AddToWorklist(NewEst.getNode()); -      Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); -    AddToWorklist(Est.getNode());    }    // If non-reciprocal square root is requested, multiply the result by Arg. -  if (!Reciprocal) { +  if (!Reciprocal)      Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); -    AddToWorklist(Est.getNode()); -  }    return Est;  } @@ -20168,13 +20149,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,    // E = (E * -0.5) * ((A * E) * E + -3.0)    for (unsigned i = 0; i < Iterations; ++i) {      SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); -    AddToWorklist(AE.getNode()); -      SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); -    AddToWorklist(AEE.getNode()); -      SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); -    AddToWorklist(RHS.getNode());      // When calculating a square root at the last iteration build:      // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20187,10 +20163,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,        // SQRT: LHS = (A * E) * -0.5        LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);      } -    AddToWorklist(LHS.getNode());      Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); -    AddToWorklist(Est.getNode());    }    return Est; @@ -20247,16 +20221,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,            SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);            SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);            Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); -          AddToWorklist(Fabs.getNode()); -          AddToWorklist(IsDenorm.getNode()); -          AddToWorklist(Est.getNode());          } else {            // X == 0.0 ? 0.0 : Est            SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);            SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);            Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); -          AddToWorklist(IsZero.getNode()); -          AddToWorklist(Est.getNode());          }        }      } diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll index 6c57aef370b..f841cb1c14c 100644 --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -57,8 +57,6 @@ entry:    ret <4 x double> %r  } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -;        an qvfmadd instead of a qvfnmsubs  define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {  ; CHECK-LABEL: foof_fmf:  ; CHECK:       # %bb.0: # %entry @@ -66,12 +64,9 @@ define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {  ; CHECK-NEXT:    qvfrsqrtes 3, 2  ; CHECK-NEXT:    addi 3, 3, .LCPI2_0@toc@l  ; CHECK-NEXT:    qvlfsx 0, 0, 3 -; CHECK-NEXT:    addis 3, 2, .LCPI2_1@toc@ha -; CHECK-NEXT:    addi 3, 3, .LCPI2_1@toc@l -; CHECK-NEXT:    qvlfsx 4, 0, 3 -; CHECK-NEXT:    qvfmadds 0, 2, 0, 2 -; CHECK-NEXT:    qvfmuls 2, 3, 3 -; CHECK-NEXT:    qvfmadds 0, 0, 2, 4 +; CHECK-NEXT:    qvfmuls 4, 3, 3 +; CHECK-NEXT:    qvfnmsubs 2, 2, 0, 2 +; CHECK-NEXT:    qvfmadds 0, 2, 4, 0  ; CHECK-NEXT:    qvfmuls 0, 3, 0  ; CHECK-NEXT:    qvfmul 1, 1, 0  ; CHECK-NEXT:    blr @@ -179,8 +174,6 @@ entry:    ret <4 x float> %r  } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -;        an qvfmadd instead of a qvfnmsubs  define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {  ; CHECK-LABEL: goo_fmf:  ; CHECK:       # %bb.0: # %entry @@ -188,12 +181,9 @@ define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {  ; CHECK-NEXT:    qvfrsqrtes 3, 2  ; CHECK-NEXT:    addi 3, 3, .LCPI6_0@toc@l  ; CHECK-NEXT:    qvlfsx 0, 0, 3 -; CHECK-NEXT:    addis 3, 2, .LCPI6_1@toc@ha -; CHECK-NEXT:    addi 3, 3, .LCPI6_1@toc@l -; CHECK-NEXT:    qvlfsx 4, 0, 3 -; CHECK-NEXT:    qvfmadds 0, 2, 0, 2 -; CHECK-NEXT:    qvfmuls 2, 3, 3 -; CHECK-NEXT:    qvfmadds 0, 0, 2, 4 +; CHECK-NEXT:    qvfmuls 4, 3, 3 +; CHECK-NEXT:    qvfnmsubs 2, 2, 0, 2 +; CHECK-NEXT:    qvfmadds 0, 2, 4, 0  ; CHECK-NEXT:    qvfmuls 0, 3, 0  ; CHECK-NEXT:    qvfmuls 1, 1, 0  ; CHECK-NEXT:    blr @@ -360,8 +350,6 @@ entry:    ret <4 x double> %r  } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -;        an qvfmadds instead of a qvfnmsubs  define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {  ; CHECK-LABEL: goo3_fmf:  ; CHECK:       # %bb.0: # %entry @@ -369,14 +357,11 @@ define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {  ; CHECK-NEXT:    qvfrsqrtes 2, 1  ; CHECK-NEXT:    addi 3, 3, .LCPI14_1@toc@l  ; CHECK-NEXT:    qvlfsx 0, 0, 3 -; CHECK-NEXT:    addis 3, 2, .LCPI14_2@toc@ha -; CHECK-NEXT:    addi 3, 3, .LCPI14_2@toc@l -; CHECK-NEXT:    qvlfsx 3, 0, 3  ; CHECK-NEXT:    addis 3, 2, .LCPI14_0@toc@ha -; CHECK-NEXT:    qvfmuls 4, 2, 2  ; CHECK-NEXT:    addi 3, 3, .LCPI14_0@toc@l -; CHECK-NEXT:    qvfmadds 0, 1, 0, 1 -; CHECK-NEXT:    qvfmadds 0, 0, 4, 3 +; CHECK-NEXT:    qvfmuls 4, 2, 2 +; CHECK-NEXT:    qvfnmsubs 3, 1, 0, 1 +; CHECK-NEXT:    qvfmadds 0, 3, 4, 0  ; CHECK-NEXT:    qvlfsx 3, 0, 3  ; CHECK-NEXT:    qvfmuls 0, 2, 0  ; CHECK-NEXT:    qvfmuls 0, 0, 1  | 

