summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h10
-rw-r--r--llvm/lib/Target/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp65
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h8
-rw-r--r--llvm/lib/Target/TargetRecip.cpp210
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp71
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h8
8 files changed, 320 insertions, 63 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ac6c7e33fb3..03833609a77 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2859,9 +2859,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const {
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
@@ -2876,8 +2877,9 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
}
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps) const {
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 4cc1a74d18b..fc042b28180 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -164,11 +164,13 @@ public:
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const = 0;
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
index 1805437b12f..e6d0199952f 100644
--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMTarget
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
+ TargetRecip.cpp
TargetSubtargetInfo.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 55e299cdc9d..05e823d7f16 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -901,6 +901,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::FSQRT);
}
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+ RefinementSteps64 = RefinementSteps + 1;
+
+ ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
+ ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
+ ReciprocalEstimates.set("divf", true, RefinementSteps);
+ ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
+
+ ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
+ ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
+ ReciprocalEstimates.set("divd", true, RefinementSteps64);
+ ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -9622,19 +9639,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
- // For the estimates, convergence is quadratic, so we essentially double the
- // number of digits correct after every iteration. For both FRE and FRSQRTE,
- // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
- // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
- int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+static std::string getRecipOp(const char *Base, EVT VT) {
+ std::string RecipOp(Base);
if (VT.getScalarType() == MVT::f64)
- RefinementSteps++;
- return RefinementSteps;
+ RecipOp += "d";
+ else
+ RecipOp += "f";
+
+ if (VT.isVector())
+ RecipOp = "vec-" + RecipOp;
+
+ return RecipOp;
}
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
- int Enabled, int &RefinementSteps,
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
@@ -9643,18 +9663,21 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
+ std::string RecipOp = getRecipOp("sqrt", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
- return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+ return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
-SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
- int Enabled,
- int &RefinementSteps) const {
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
@@ -9662,9 +9685,13 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
+ std::string RecipOp = getRecipOp("div", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 3d4ec27c5a6..bdd658a8413 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -967,11 +967,11 @@ namespace llvm {
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
diff --git a/llvm/lib/Target/TargetRecip.cpp b/llvm/lib/Target/TargetRecip.cpp
new file mode 100644
index 00000000000..938ed9f3240
--- /dev/null
+++ b/llvm/lib/Target/TargetRecip.cpp
@@ -0,0 +1,210 @@
+//===-------------------------- TargetRecip.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to customize machine-specific reciprocal estimate code
+// generation in a target-independent way.
+// If a target does not support operations in this specification, then code
+// generation will default to using supported operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRecip.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// These are the names of the individual reciprocal operations. These are
+// the key strings for queries and command-line inputs.
+// In addition, the command-line interface recognizes the global parameters
+// "all", "none", and "default".
+static const char *const RecipOps[] = {
+ "divd",
+ "divf",
+ "vec-divd",
+ "vec-divf",
+ "sqrtd",
+ "sqrtf",
+ "vec-sqrtd",
+ "vec-sqrtf",
+};
+
+/// All operations are disabled by default and refinement steps are set to zero.
+TargetRecip::TargetRecip() {
+ unsigned NumStrings = llvm::array_lengthof(RecipOps);
+ for (unsigned i = 0; i < NumStrings; ++i)
+ RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
+}
+
+static bool parseRefinementStep(StringRef In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (RefStepChar >= '0' && RefStepChar <= '9') {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+}
+
+bool TargetRecip::parseGlobalParams(const std::string &Arg) {
+ StringRef ArgSub = Arg;
+
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ StringRef RefStepString;
+ if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ RefStepString = ArgSub.substr(RefPos + 1);
+ ArgSub = ArgSub.substr(0, RefPos);
+ }
+ bool Enable;
+ bool UseDefaults;
+ if (ArgSub == "all") {
+ UseDefaults = false;
+ Enable = true;
+ } else if (ArgSub == "none") {
+ UseDefaults = false;
+ Enable = false;
+ } else if (ArgSub == "default") {
+ UseDefaults = true;
+ } else {
+ // Any other string is invalid or an individual setting.
+ return false;
+ }
+
+ // All enable values will be initialized to target defaults if 'default' was
+ // specified.
+ if (!UseDefaults)
+ for (auto &KV : RecipMap)
+ KV.second.Enabled = Enable;
+
+ // Custom refinement count was specified with all, none, or default.
+ if (!RefStepString.empty())
+ for (auto &KV : RecipMap)
+ KV.second.RefinementSteps = RefSteps;
+
+ return true;
+}
+
+void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
+ static const char DisabledPrefix = '!';
+ unsigned NumArgs = Args.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ StringRef Val = Args[i];
+
+ bool IsDisabled = Val[0] == DisabledPrefix;
+ // Ignore the disablement token for string matching.
+ if (IsDisabled)
+ Val = Val.substr(1);
+
+ size_t RefPos;
+ uint8_t RefSteps;
+ StringRef RefStepString;
+ if (parseRefinementStep(Val, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ RefStepString = Val.substr(RefPos + 1);
+ Val = Val.substr(0, RefPos);
+ }
+
+ RecipIter Iter = RecipMap.find(Val);
+ if (Iter == RecipMap.end()) {
+ // Try again specifying float suffix.
+ Iter = RecipMap.find(Val.str() + 'f');
+ if (Iter == RecipMap.end()) {
+ Iter = RecipMap.find(Val.str() + 'd');
+ assert(Iter == RecipMap.end() && "Float entry missing from map");
+ report_fatal_error("Invalid option for -recip.");
+ }
+ }
+
+ // Mark the matched option as found. Do not allow duplicate specifiers.
+ Iter->second.Enabled = !IsDisabled;
+ if (!RefStepString.empty())
+ Iter->second.RefinementSteps = RefSteps;
+
+ // If the precision was not specified, the double entry is also initialized.
+ if (Val.back() != 'f' && Val.back() != 'd') {
+ RecipParams &Params = RecipMap[Val.str() + 'd'];
+ Params.Enabled = !IsDisabled;
+ if (!RefStepString.empty())
+ Params.RefinementSteps = RefSteps;
+ }
+ }
+}
+
+void TargetRecip::set(StringRef &RecipString) {
+ SmallVector<StringRef, 4> RecipStringVector;
+ SplitString(RecipString, RecipStringVector, ",");
+ std::vector<std::string> RecipVector;
+ for (unsigned i = 0; i < RecipStringVector.size(); ++i)
+ RecipVector.push_back(RecipStringVector[i].str());
+
+ unsigned NumArgs = RecipVector.size();
+
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
+ return;
+
+ parseIndividualParams(RecipVector);
+}
+
+bool TargetRecip::isEnabled(StringRef Key) const {
+ ConstRecipIter Iter = RecipMap.find(Key);
+ assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
+ return Iter->second.Enabled;
+}
+
+unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
+ ConstRecipIter Iter = RecipMap.find(Key);
+ assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
+ return Iter->second.RefinementSteps;
+}
+
+void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
+ if (Key == "all") {
+ for (auto &KV : RecipMap) {
+ RecipParams &RP = KV.second;
+ RP.Enabled = Enable;
+ RP.RefinementSteps = RefSteps;
+ }
+ } else {
+ RecipParams &RP = RecipMap[Key];
+ RP.Enabled = Enable;
+ RP.RefinementSteps = RefSteps;
+ }
+}
+
+bool TargetRecip::operator==(const TargetRecip &Other) const {
+ for (const auto &KV : RecipMap) {
+ StringRef Op = KV.first;
+ const RecipParams &RP = KV.second;
+ const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
+ if (RP.RefinementSteps != OtherRP.RefinementSteps)
+ return false;
+ if (RP.Enabled != OtherRP.Enabled)
+ return false;
+ }
+ return true;
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b1f26109154..c1b6a2204cd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRecip.h"
#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
@@ -84,6 +85,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ // By default (and when -ffast-math is on), enable estimate codegen with 1
+ // refinement step for floats (not doubles) except scalar division. Scalar
+ // division estimates are disabled because they break too much real-world
+ // code. These defaults are intended to match GCC behavior.
+ ReciprocalEstimates.set("sqrtf", true, 1);
+ ReciprocalEstimates.set("divf", false, 1);
+ ReciprocalEstimates.set("vec-sqrtf", true, 1);
+ ReciprocalEstimates.set("vec-divf", true, 1);
+
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
@@ -15241,10 +15251,11 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Op.getValueType();
+ const char *RecipOp;
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
@@ -15253,24 +15264,30 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX())) {
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = 1;
+ if (VT == MVT::f32 && Subtarget.hasSSE1())
+ RecipOp = "sqrtf";
+ else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX()))
+ RecipOp = "vec-sqrtf";
+ else
+ return SDValue();
- UseOneConstNR = false;
- return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
}
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
- int Enabled,
- int &RefinementSteps) const {
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
EVT VT = Op.getValueType();
+ const char *RecipOp;
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
@@ -15279,22 +15296,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
+ if (VT == MVT::f32 && Subtarget.hasSSE1())
+ RecipOp = "divf";
+ else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX()))
+ RecipOp = "vec-divf";
+ else
+ return SDValue();
- if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX())) {
- // Enable estimate codegen with 1 refinement step for vector division.
- // Scalar division estimates are disabled because they break too much
- // real-world code. These defaults are intended to match GCC behavior.
- if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
- return SDValue();
-
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = 1;
+ TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
- return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
}
/// If we have at least two divisions that use the same divisor, convert to
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 83c0d010c24..09d6ee47393 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1266,13 +1266,13 @@ namespace llvm {
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
/// Use rsqrt* to speed up sqrt calculations.
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
bool &UseOneConstNR) const override;
/// Use rcp* to speed up fdiv calculations.
- SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;
OpenPOWER on IntegriCloud