summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp11
1 files changed, 8 insertions, 3 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 194e46b0448..9584776e185 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1080,9 +1080,14 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
: Intrinsic::nvvm_sqrt_approx_f);
else {
- // There's no sqrt.approx.f64 instruction, so we emit x * rsqrt(x).
- return DAG.getNode(ISD::FMUL, DL, VT, Operand,
- MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
+ // There's no sqrt.approx.f64 instruction, so we emit
+ // reciprocal(rsqrt(x)). This is faster than
+ // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
+ // x * rsqrt(x).)
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
+ MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
}
}
}
OpenPOWER on IntegriCloud