summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-01-13 20:53:23 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-01-13 20:53:23 +0000
commitbf0db918b23cb28b8ed35fe0371d0d9be6fd27f4 (patch)
tree0c652c950ff493f9058e8e8998486df42f556b5e /llvm
parente93d06a5793514da2c6b16165e67361443094a4e (diff)
downloadbcm5719-llvm-bf0db918b23cb28b8ed35fe0371d0d9be6fd27f4.tar.gz
bcm5719-llvm-bf0db918b23cb28b8ed35fe0371d0d9be6fd27f4.zip
R600: Implement getRecipEstimate
This requires a new hook to prevent expanding sqrt in terms of rsqrt and reciprocal. v_rcp_f32, v_rsq_f32, and v_sqrt_f32 are all the same rate, so this expansion would just double the number of instructions and cycles. llvm-svn: 225828
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Target/TargetLowering.h14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.cpp23
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.h3
5 files changed, 42 insertions, 2 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
index b18c15ba144..a6e46cecc38 100644
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -217,6 +217,11 @@ public:
/// several shifts, adds, and multiplies for this target.
bool isIntDivCheap() const { return IntDivIsCheap; }
+ /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
+ bool isFsqrtCheap() const {
+ return FsqrtIsCheap;
+ }
+
/// Returns true if target has indicated at least one type should be bypassed.
bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
@@ -1183,7 +1188,11 @@ protected:
/// possible, should be replaced by an alternate sequence of instructions not
/// containing an integer divide.
void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
-
+
+ /// Tells the code generator that fsqrt is cheap, and should not be replaced
+ /// with an alternative sequence of instructions.
+ void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
+
/// Tells the code generator that this target supports floating point
/// exceptions and cares about preserving floating point exception behavior.
void setHasFloatingPointExceptions(bool FPExceptions = true) {
@@ -1625,6 +1634,9 @@ private:
/// unconditionally.
bool IntDivIsCheap;
+ // Don't expand fsqrt with an approximation based on the inverse sqrt.
+ bool FsqrtIsCheap;
+
/// Tells the code generator to bypass slow divide or remainder
/// instructions. For example, BypassSlowDivWidths[32,8] tells the code
/// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 171ee1e3488..e57c5a2b1ac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7538,7 +7538,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ !TLI.isFsqrtCheap()) {
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
EVT VT = RV.getValueType();
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index fae54d2c304..9b2fdffa87c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -710,6 +710,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
IntDivIsCheap = false;
+ FsqrtIsCheap = false;
Pow2SDivIsCheap = false;
JumpIsExpensive = false;
PredictableSelectIsExpensive = false;
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index 55a35f77f6a..1ad2a693da1 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -403,6 +403,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// large sequence of instructions.
setIntDivIsCheap(false);
setPow2SDivIsCheap(false);
+ setFsqrtIsCheap(true);
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
@@ -2585,6 +2586,28 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
return SDValue();
}
+SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = Operand.getValueType();
+
+ if (VT == MVT::f32) {
+ // Reciprocal, < 1 ulp error.
+ //
+ // This reciprocal approximation converges to < 0.5 ulp error with one
+ // newton rhapson performed with two fused multiple adds (FMAs).
+
+ RefinementSteps = 0;
+ return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
+ }
+
+ // TODO: There is also f64 rcp instruction, but the documentation is less
+ // clear on its precision.
+
+ return SDValue();
+}
+
static void computeKnownBitsForMinMax(const SDValue Op0,
const SDValue Op1,
APInt &KnownZero,
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h
index 21c8e988179..15f529c40a3 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -171,6 +171,9 @@ public:
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const {
OpenPOWER on IntegriCloud