Revert "make reciprocal estimate code generation more flexible by adding command-line options"

This reverts commit r238051. It broke some bots: http://lab.llvm.org:8011/builders/llvm-ppc64-linux1/builds/18190 llvm-svn: 238075
author: Rafael Espindola <rafael.espindola@gmail.com> 2015-05-23 00:22:44 +0000
committer: Rafael Espindola <rafael.espindola@gmail.com> 2015-05-23 00:22:44 +0000
commit: 445712264d494b1723074b9ba4be530ddc56ea1a (patch)
tree: 574d55a9e254924f743fc40bdc3e01091c9ba488 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent: 6bbe9743d1a90f1d2749654dc3bec4ebcbc9b498 (diff)
download: bcm5719-llvm-445712264d494b1723074b9ba4be530ddc56ea1a.tar.gz
bcm5719-llvm-445712264d494b1723074b9ba4be530ddc56ea1a.zip
1 files changed, 35 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 609973f8971..e850e500db6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -67,6 +67,12 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
              "rather than promotion."),
     cl::Hidden);
 
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+    "x86-recip-refinement-steps", cl::init(1),
+    cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+             "result of the hardware reciprocal estimate instruction."),
+    cl::NotHidden);
+
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -12895,31 +12901,29 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps,
                                             bool &UseOneConstNR) const {
+  // FIXME: We should use instruction latency models to calculate the cost of
+  // each potential sequence, but this is very hard to do reliably because
+  // at least Intel's Core* chips have variable timing based on the number of
+  // significant digits in the divisor and/or sqrt operand.
+  if (!Subtarget->useSqrtEst())
+    return SDValue();
+
   EVT VT = Op.getValueType();
-  const char *RecipOp;
 
-  // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
+  // SSE1 has rsqrtss and rsqrtps.
   // TODO: Add support for AVX512 (v16f32).
   // It is likely not profitable to do this for f64 because a double-precision
   // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
   // instructions: convert to single, rsqrtss, convert back to double, refine
   // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
-    RecipOp = "sqrtf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
-    RecipOp = "vec-sqrtf";
-  else
-    return SDValue();
-  
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
-  
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  UseOneConstNR = false;
-  return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+      (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+    RefinementSteps = 1;
+    UseOneConstNR = false;
+    return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
 }
 
 /// The minimum architected relative accuracy is 2^-12. We need one
@@ -12927,9 +12931,15 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
 SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps) const {
+  // FIXME: We should use instruction latency models to calculate the cost of
+  // each potential sequence, but this is very hard to do reliably because
+  // at least Intel's Core* chips have variable timing based on the number of
+  // significant digits in the divisor.
+  if (!Subtarget->useReciprocalEst())
+    return SDValue();
+
   EVT VT = Op.getValueType();
-  const char *RecipOp;
-  
+
   // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
   // TODO: Add support for AVX512 (v16f32).
   // It is likely not profitable to do this for f64 because a double-precision
@@ -12937,20 +12947,12 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   // 15 instructions: convert to single, rcpss, convert back to double, refine
   // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
-    RecipOp = "divf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
-    RecipOp = "vec-divf";
-  else
-    return SDValue();
-  
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
-
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+  if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+      (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+    RefinementSteps = ReciprocalEstimateRefinementSteps;
+    return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
 }
 
 /// If we have at least two divisions that use the same divisor, convert to
author	Rafael Espindola <rafael.espindola@gmail.com>	2015-05-23 00:22:44 +0000
committer	Rafael Espindola <rafael.espindola@gmail.com>	2015-05-23 00:22:44 +0000
commit	445712264d494b1723074b9ba4be530ddc56ea1a (patch)
tree	574d55a9e254924f743fc40bdc3e01091c9ba488 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent	6bbe9743d1a90f1d2749654dc3bec4ebcbc9b498 (diff)
download	bcm5719-llvm-445712264d494b1723074b9ba4be530ddc56ea1a.tar.gz bcm5719-llvm-445712264d494b1723074b9ba4be530ddc56ea1a.zip