diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2016-10-20 16:55:45 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2016-10-20 16:55:45 +0000 |
| commit | 0051efcf977134026a8e2a2239e2c28815d07e40 (patch) | |
| tree | b5a13a1aebc2baa9f30f363d23e2385e78982d4a /llvm/lib/Target | |
| parent | d65ec2ceb2bac8c5328046f5e3f093ab01acc4b9 (diff) | |
| download | bcm5719-llvm-0051efcf977134026a8e2a2239e2c28815d07e40.tar.gz bcm5719-llvm-0051efcf977134026a8e2a2239e2c28815d07e40.zip | |
[Target] remove TargetRecip class; 2nd try
This is a retry of r284495 which was reverted at r284513 due to use-after-scope bugs
caused by faulty usage of StringRef.
This version also renames a pair of functions:
getRecipEstimateDivEnabled()
getRecipEstimateSqrtEnabled()
as suggested by Eric Christopher.
original commit msg:
[Target] remove TargetRecip class; move reciprocal estimate isel functionality to TargetLowering
This is a follow-up to https://reviews.llvm.org/D24816 - where we changed reciprocal estimates to be function attributes
rather than TargetOptions.
This patch is intended to be a structural, but not functional change. By moving all of the
TargetRecip functionality into TargetLowering, we can remove all of the reciprocal estimate
state, shield the callers from the string format implementation, and simplify/localize the
logic needed for a target to enable this.
If a function has a "reciprocal-estimates" attribute, those settings may override the target's
default reciprocal preferences for whatever operation and data type we're trying to optimize.
If there's no attribute string or specific setting for the op/type pair, just use the target
default settings.
As noted earlier, a better solution would be to move the reciprocal estimate settings to IR
instructions and SDNodes rather than function attributes, but that's a multi-step job that
requires infrastructure improvements. I intend to work on that, but it's not clear how long
it will take to get all the pieces in place.
Differential Revision: https://reviews.llvm.org/D25440
llvm-svn: 284746
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 65 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/TargetRecip.cpp | 210 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 71 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 8 |
8 files changed, 63 insertions, 320 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 03833609a77..ac6c7e33fb3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2859,10 +2859,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const { - SelectionDAG &DAG = DCI.DAG; EVT VT = Operand.getValueType(); if (VT == MVT::f32) { @@ -2877,9 +2876,8 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand, } SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { - SelectionDAG &DAG = DCI.DAG; + SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const { EVT VT = Operand.getValueType(); if (VT == MVT::f32) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index fc042b28180..4cc1a74d18b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -164,13 +164,11 @@ public: bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; } - SDValue getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const = 0; diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt index e6d0199952f..1805437b12f 100644 --- a/llvm/lib/Target/CMakeLists.txt +++ b/llvm/lib/Target/CMakeLists.txt @@ -6,7 +6,6 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp - TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 05e823d7f16..55e299cdc9d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -901,23 +901,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::FSQRT); } - // For the estimates, convergence is quadratic, so we essentially double the - // number of digits correct after every iteration. For both FRE and FRSQRTE, - // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), - // this is 2^-14. IEEE float has 23 digits and double has 52 digits. - unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, - RefinementSteps64 = RefinementSteps + 1; - - ReciprocalEstimates.set("sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("divf", true, RefinementSteps); - ReciprocalEstimates.set("vec-divf", true, RefinementSteps); - - ReciprocalEstimates.set("sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("divd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-divd", true, RefinementSteps64); - // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -9639,22 +9622,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// -static std::string getRecipOp(const char *Base, EVT VT) { - std::string RecipOp(Base); +static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { + // For the estimates, convergence is quadratic, so we essentially double the + // number of digits correct after every iteration. For both FRE and FRSQRTE, + // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), + // this is 2^-14. IEEE float has 23 digits and double has 52 digits. + int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) - RecipOp += "d"; - else - RecipOp += "f"; - - if (VT.isVector()) - RecipOp = "vec-" + RecipOp; - - return RecipOp; + RefinementSteps++; + return RefinementSteps; } -SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || @@ -9663,21 +9643,18 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("sqrt", VT); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; - return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); + return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } -SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, + int &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || @@ -9685,13 +9662,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("div", VT); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); + return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index bdd658a8413..3d4ec27c5a6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -967,11 +967,11 @@ namespace llvm { SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; diff --git a/llvm/lib/Target/TargetRecip.cpp b/llvm/lib/Target/TargetRecip.cpp deleted file mode 100644 index 938ed9f3240..00000000000 --- a/llvm/lib/Target/TargetRecip.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//===-------------------------- TargetRecip.cpp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetRecip.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -// These are the names of the individual reciprocal operations. These are -// the key strings for queries and command-line inputs. -// In addition, the command-line interface recognizes the global parameters -// "all", "none", and "default". -static const char *const RecipOps[] = { - "divd", - "divf", - "vec-divd", - "vec-divf", - "sqrtd", - "sqrtf", - "vec-sqrtd", - "vec-sqrtf", -}; - -/// All operations are disabled by default and refinement steps are set to zero. -TargetRecip::TargetRecip() { - unsigned NumStrings = llvm::array_lengthof(RecipOps); - for (unsigned i = 0; i < NumStrings; ++i) - RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); -} - -static bool parseRefinementStep(StringRef In, size_t &Position, - uint8_t &Value) { - const char RefStepToken = ':'; - Position = In.find(RefStepToken); - if (Position == StringRef::npos) - return false; - - StringRef RefStepString = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. - if (RefStepString.size() == 1) { - char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { - Value = RefStepChar - '0'; - return true; - } - } - report_fatal_error("Invalid refinement step for -recip."); -} - -bool TargetRecip::parseGlobalParams(const std::string &Arg) { - StringRef ArgSub = Arg; - - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = ArgSub.substr(RefPos + 1); - ArgSub = ArgSub.substr(0, RefPos); - } - bool Enable; - bool UseDefaults; - if (ArgSub == "all") { - UseDefaults = false; - Enable = true; - } else if (ArgSub == "none") { - UseDefaults = false; - Enable = false; - } else if (ArgSub == "default") { - UseDefaults = true; - } else { - // Any other string is invalid or an individual setting. - return false; - } - - // All enable values will be initialized to target defaults if 'default' was - // specified. - if (!UseDefaults) - for (auto &KV : RecipMap) - KV.second.Enabled = Enable; - - // Custom refinement count was specified with all, none, or default. - if (!RefStepString.empty()) - for (auto &KV : RecipMap) - KV.second.RefinementSteps = RefSteps; - - return true; -} - -void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) { - static const char DisabledPrefix = '!'; - unsigned NumArgs = Args.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - StringRef Val = Args[i]; - - bool IsDisabled = Val[0] == DisabledPrefix; - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(Val, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = Val.substr(RefPos + 1); - Val = Val.substr(0, RefPos); - } - - RecipIter Iter = RecipMap.find(Val); - if (Iter == RecipMap.end()) { - // Try again specifying float suffix. - Iter = RecipMap.find(Val.str() + 'f'); - if (Iter == RecipMap.end()) { - Iter = RecipMap.find(Val.str() + 'd'); - assert(Iter == RecipMap.end() && "Float entry missing from map"); - report_fatal_error("Invalid option for -recip."); - } - } - - // Mark the matched option as found. Do not allow duplicate specifiers. - Iter->second.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Iter->second.RefinementSteps = RefSteps; - - // If the precision was not specified, the double entry is also initialized. - if (Val.back() != 'f' && Val.back() != 'd') { - RecipParams &Params = RecipMap[Val.str() + 'd']; - Params.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Params.RefinementSteps = RefSteps; - } - } -} - -void TargetRecip::set(StringRef &RecipString) { - SmallVector<StringRef, 4> RecipStringVector; - SplitString(RecipString, RecipStringVector, ","); - std::vector<std::string> RecipVector; - for (unsigned i = 0; i < RecipStringVector.size(); ++i) - RecipVector.push_back(RecipStringVector[i].str()); - - unsigned NumArgs = RecipVector.size(); - - // Check if "all", "default", or "none" was specified. - if (NumArgs == 1 && parseGlobalParams(RecipVector[0])) - return; - - parseIndividualParams(RecipVector); -} - -bool TargetRecip::isEnabled(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.Enabled; -} - -unsigned TargetRecip::getRefinementSteps(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.RefinementSteps; -} - -void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) { - if (Key == "all") { - for (auto &KV : RecipMap) { - RecipParams &RP = KV.second; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } - } else { - RecipParams &RP = RecipMap[Key]; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } -} - -bool TargetRecip::operator==(const TargetRecip &Other) const { - for (const auto &KV : RecipMap) { - StringRef Op = KV.first; - const RecipParams &RP = KV.second; - const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; - if (RP.RefinementSteps != OtherRP.RefinementSteps) - return false; - if (RP.Enabled != OtherRP.Enabled) - return false; - } - return true; -} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index babc0f475c9..8fbdbdd3123 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53,7 +53,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRecip.h" #include "X86IntrinsicsInfo.h" #include <bitset> #include <numeric> @@ -85,15 +84,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // By default (and when -ffast-math is on), enable estimate codegen with 1 - // refinement step for floats (not doubles) except scalar division. Scalar - // division estimates are disabled because they break too much real-world - // code. These defaults are intended to match GCC behavior. - ReciprocalEstimates.set("sqrtf", true, 1); - ReciprocalEstimates.set("divf", false, 1); - ReciprocalEstimates.set("vec-sqrtf", true, 1); - ReciprocalEstimates.set("vec-divf", true, 1); - // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. @@ -15239,11 +15229,10 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). @@ -15252,30 +15241,24 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "sqrtf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-sqrtf"; - else - return SDValue(); + if ((VT == MVT::f32 && Subtarget.hasSSE1()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + UseOneConstNR = false; + return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + } + return SDValue(); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). -SDValue X86TargetLowering::getRecipEstimate(SDValue Op, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, + int Enabled, + int &RefinementSteps) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). @@ -15284,20 +15267,22 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "divf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-divf"; - else - return SDValue(); - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); + if ((VT == MVT::f32 && Subtarget.hasSSE1()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + // Enable estimate codegen with 1 refinement step for vector division. + // Scalar division estimates are disabled because they break too much + // real-world code. These defaults are intended to match GCC behavior. + if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) + return SDValue(); - RefinementSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; + + return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + } + return SDValue(); } /// If we have at least two divisions that use the same divisor, convert to diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index bc7909c654d..62b77ab6340 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1268,13 +1268,13 @@ namespace llvm { bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; /// Use rsqrt* to speed up sqrt calculations. - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; /// Use rcp* to speed up fdiv calculations. - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; |

