diff options
| author | Justin Lebar <jlebar@google.com> | 2017-01-31 23:08:57 +0000 |
|---|---|---|
| committer | Justin Lebar <jlebar@google.com> | 2017-01-31 23:08:57 +0000 |
| commit | 06fcea4cd92ecedb8ddcebbe80650d2e92baf2db (patch) | |
| tree | fa8e53f46d0dd2880f4a78ab50464f673d197007 /llvm/lib/Target/NVPTX | |
| parent | d9953d9dd289b87c17ed6b60d14f2db62b1f9dc4 (diff) | |
| download | bcm5719-llvm-06fcea4cd92ecedb8ddcebbe80650d2e92baf2db.tar.gz bcm5719-llvm-06fcea4cd92ecedb8ddcebbe80650d2e92baf2db.zip | |
[NVPTX] Compute approx sqrt as 1/rsqrt(x) rather than x*rsqrt(x).
x*rsqrt(x) returns NaN for x == 0, whereas 1/rsqrt(x) returns 0, as
desired.
Verified that the particular nvptx approximate instructions here do in
fact return 0 for x = 0.
llvm-svn: 293713
Diffstat (limited to 'llvm/lib/Target/NVPTX')
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 194e46b0448..9584776e185 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1080,9 +1080,14 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f : Intrinsic::nvvm_sqrt_approx_f); else { - // There's no sqrt.approx.f64 instruction, so we emit x * rsqrt(x). - return DAG.getNode(ISD::FMUL, DL, VT, Operand, - MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); + // There's no sqrt.approx.f64 instruction, so we emit + // reciprocal(rsqrt(x)). This is faster than + // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain + // x * rsqrt(x).) + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), + MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); } } } |

