summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2017-01-31 23:08:57 +0000
committerJustin Lebar <jlebar@google.com>2017-01-31 23:08:57 +0000
commit06fcea4cd92ecedb8ddcebbe80650d2e92baf2db (patch)
treefa8e53f46d0dd2880f4a78ab50464f673d197007 /llvm/test/CodeGen
parentd9953d9dd289b87c17ed6b60d14f2db62b1f9dc4 (diff)
downloadbcm5719-llvm-06fcea4cd92ecedb8ddcebbe80650d2e92baf2db.tar.gz
bcm5719-llvm-06fcea4cd92ecedb8ddcebbe80650d2e92baf2db.zip
[NVPTX] Compute approx sqrt as 1/rsqrt(x) rather than x*rsqrt(x).
x*rsqrt(x) returns NaN for x == 0, whereas 1/rsqrt(x) returns 0, as desired. Verified that the particular nvptx approximate instructions here do in fact return 0 for x = 0. llvm-svn: 293713
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/NVPTX/fast-math.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/sqrt-approx.ll8
2 files changed, 7 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 528d2c02df5..f925d67434c 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -40,11 +40,11 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
}
; There are no fast-math or ftz versions of sqrt and div for f64. We use
-; x * rsqrt(x) for sqrt(x), and emit a vanilla divide.
+; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
; CHECK: rsqrt.approx.f64
-; CHECK: mul.f64
+; CHECK: rcp.approx.ftz.f64
; CHECK: div.rn.f64
define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
%t1 = tail call double @llvm.sqrt.f64(double %a)
diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
index 5edf9e28a93..1e28db44b80 100644
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
@@ -59,9 +59,11 @@ define float @test_sqrt_ftz(float %a) #0 #1 {
; CHECK-LABEL test_sqrt64
define double @test_sqrt64(double %a) #0 {
-; There's no sqrt.approx.f64 instruction; we emit x * rsqrt.approx.f64(x).
+; There's no sqrt.approx.f64 instruction; we emit
+; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal,
+; so we just use the ftz version.
; CHECK: rsqrt.approx.f64
-; CHECK: mul.f64
+; CHECK: rcp.approx.ftz.f64
%ret = tail call double @llvm.sqrt.f64(double %a)
ret double %ret
}
@@ -70,7 +72,7 @@ define double @test_sqrt64(double %a) #0 {
define double @test_sqrt64_ftz(double %a) #0 #1 {
; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
; CHECK: rsqrt.approx.f64
-; CHECK: mul.f64
+; CHECK: rcp.approx.ftz.f64
%ret = tail call double @llvm.sqrt.f64(double %a)
ret double %ret
}
OpenPOWER on IntegriCloud