[Target] move reciprocal estimate settings from TargetOptions to TargetLowering

The motivation for the change is that we can't have pseudo-global settings for codegen living in TargetOptions because that doesn't work with LTO. Ideally, these reciprocal attributes will be moved to the instruction-level via FMF, metadata, or something else. But making them function attributes is at least an improvement over the current state. The ingredients of this patch are: Remove the reciprocal estimate command-line debug option. Add TargetRecip to TargetLowering. Remove TargetRecip from TargetOptions. Clean up the TargetRecip implementation to work with this new scheme. Set the default reciprocal settings in TargetLoweringBase (everything is off). Update the PowerPC defaults, users, and tests. Update the x86 defaults, users, and tests. Note that if this patch needs to be reverted, the related clang patch checked in at r283251 should be reverted too. Differential Revision: https://reviews.llvm.org/D24816 llvm-svn: 283252
author: Sanjay Patel <spatel@rotateright.com> 2016-10-04 20:46:43 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2016-10-04 20:46:43 +0000
commit: bfdbea6481a2247fcc63c27e3acd393000fd8d41 (patch)
tree: e7dfce8045101809ecdb13200171bdfab26beda5 /llvm/test/CodeGen
parent: 0bb72c14240496d74f9263fad6aba830625499bd (diff)
download: bcm5719-llvm-bfdbea6481a2247fcc63c27e3acd393000fd8d41.tar.gz
bcm5719-llvm-bfdbea6481a2247fcc63c27e3acd393000fd8d41.zip
4 files changed, 297 insertions, 204 deletions
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index c49660fdc3a..3a8e2ff7d61 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx -recip=sqrtf:0,sqrtd:0 | FileCheck %s -check-prefix=CHECK-NONR
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
+
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -25,19 +25,26 @@ define double @foo(double %a, double %b) nounwind {
 ; CHECK-NEXT: fmul
 ; CHECK: blr
 
-; CHECK-NONR: @foo
-; CHECK-NONR: frsqrte
-; CHECK-NONR-NOT: fmadd
-; CHECK-NONR: fmul
-; CHECK-NONR-NOT: fmadd
-; CHECK-NONR: blr
-
 ; CHECK-SAFE: @foo
 ; CHECK-SAFE: fsqrt
 ; CHECK-SAFE: fdiv
 ; CHECK-SAFE: blr
 }
 
+define double @no_estimate_refinement_f64(double %a, double %b) #0 {
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK-LABEL: @no_estimate_refinement_f64
+; CHECK: frsqrte
+; CHECK-NOT: fmadd
+; CHECK: fmul
+; CHECK-NOT: fmadd
+; CHECK: blr
+}
+
+
 define double @foof(double %a, float %b) nounwind {
   %x = call float @llvm.sqrt.f32(float %b)
   %y = fpext float %x to double
@@ -98,19 +105,26 @@ define float @goo(float %a, float %b) nounwind {
 ; CHECK-NEXT: fmuls
 ; CHECK-NEXT: blr
 
-; CHECK-NONR: @goo
-; CHECK-NONR: frsqrtes
-; CHECK-NONR-NOT: fmadds
-; CHECK-NONR: fmuls
-; CHECK-NONR-NOT: fmadds
-; CHECK-NONR: blr
-
 ; CHECK-SAFE: @goo
 ; CHECK-SAFE: fsqrts
 ; CHECK-SAFE: fdivs
 ; CHECK-SAFE: blr
 }
 
+
+define float @no_estimate_refinement_f32(float %a, float %b) #0 {
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK-LABEL: @no_estimate_refinement_f32
+; CHECK: frsqrtes
+; CHECK-NOT: fmadds
+; CHECK: fmuls
+; CHECK-NOT: fmadds
+; CHECK: blr
+}
+
 ; Recognize that this is rsqrt(a) * rcp(b) * c, 
 ; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
 define float @rsqrt_fmul(float %a, float %b, float %c) {
@@ -252,3 +266,5 @@ define <4 x float> @hoo3(<4 x float> %a) nounwind {
 ; CHECK-SAFE: blr
 }
 
+attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" }
+
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index 8e02dad9d5a..bd622d0442e 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx  | FileCheck %s --check-prefix=AVX
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -10,100 +9,142 @@
 ; for details about the accuracy, speed, and implementation
 ; differences of x86 reciprocal estimates.
 
-define float @reciprocal_estimate(float %x) #0 {
+define float @f32_no_estimate(float %x) #0 {
+; AVX-LABEL: f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast float 1.0, %x
   ret float %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate:
-; NORECIP: movss
-; NORECIP-NEXT: divss
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate:
-; RECIP: vrcpss
-; RECIP: vmulss
-; RECIP: vsubss
-; RECIP: vmulss
-; RECIP: vaddss
-; RECIP-NEXT: retq
+define float @f32_one_step(float %x) #1 {
+; AVX-LABEL: f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast float 1.0, %x
+  ret float %div
+}
 
-; REFINE-LABEL: reciprocal_estimate:
-; REFINE: vrcpss
-; REFINE: vmulss
-; REFINE: vsubss
-; REFINE: vmulss
-; REFINE: vaddss
-; REFINE: vmulss
-; REFINE: vsubss
-; REFINE: vmulss
-; REFINE: vaddss
-; REFINE-NEXT: retq
+define float @f32_two_step(float %x) #2 {
+; AVX-LABEL: f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vmulss %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vaddss %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsubss %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast float 1.0, %x
+  ret float %div
 }
 
-define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
+define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
+; AVX-LABEL: v4f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate_v4f32:
-; NORECIP: movaps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate_v4f32:
-; RECIP: vrcpps
-; RECIP: vmulps
-; RECIP: vsubps
-; RECIP: vmulps
-; RECIP: vaddps
-; RECIP-NEXT: retq
+define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
+; AVX-LABEL: v4f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
+}
 
-; REFINE-LABEL: reciprocal_estimate_v4f32:
-; REFINE: vrcpps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE-NEXT: retq
+define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
+; AVX-LABEL: v4f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vmulps %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vaddps %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsubps %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
 }
 
-define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
+define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
+; AVX-LABEL: v8f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate_v8f32:
-; NORECIP: movaps
-; NORECIP: movaps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate_v8f32:
-; RECIP: vrcpps
-; RECIP: vmulps
-; RECIP: vsubps
-; RECIP: vmulps
-; RECIP: vaddps
-; RECIP-NEXT: retq
+define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
+; AVX-LABEL: v8f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
+}
 
-; REFINE-LABEL: reciprocal_estimate_v8f32:
-; REFINE: vrcpps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE-NEXT: retq
+define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
+; AVX-LABEL: v8f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm2
+; AVX-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vmulps %ymm2, %ymm1, %ymm2
+; AVX-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vsubps %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" }
+attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" }
+
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index ba43d682268..aec85136768 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -recip=sqrt:2 -stop-after=expand-isel-pseudos 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=expand-isel-pseudos 2>&1 | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) #0
 
@@ -48,5 +48,5 @@ define float @rfoo(float %f) #0 {
   ret float %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 1c6b13026a7..68424c60aa6 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,141 +1,177 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
-
-declare double @__sqrt_finite(double) #0
-declare float @__sqrtf_finite(float) #0
-declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
-declare float @llvm.sqrt.f32(float) #0
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
-
-
-define double @fd(double %d) #0 {
-; NORECIP-LABEL: fd:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtsd %xmm0, %xmm0
-; NORECIP-NEXT:    retq
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+declare double @__sqrt_finite(double)
+declare float @__sqrtf_finite(float)
+declare x86_fp80 @__sqrtl_finite(x86_fp80)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+
+
+define double @finite_f64_no_estimate(double %d) #0 {
+; AVX-LABEL: finite_f64_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: fd:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
-; ESTIMATE-NEXT:    retq
-  %call = tail call double @__sqrt_finite(double %d) #1
+  %call = tail call double @__sqrt_finite(double %d) #2
   ret double %call
 }
 
+; No estimates for doubles.
+
+define double @finite_f64_estimate(double %d) #1 {
+; AVX-LABEL: finite_f64_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call double @__sqrt_finite(double %d) #2
+  ret double %call
+}
 
-define float @ff(float %f) #0 {
-; NORECIP-LABEL: ff:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtss %xmm0, %xmm0
-; NORECIP-NEXT:    retq
+define float @finite_f32_no_estimate(float %f) #0 {
+; AVX-LABEL: finite_f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: ff:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm0, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm2, %xmm1
-; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm2, %xmm1
-; ESTIMATE-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; ESTIMATE-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vandnps %xmm1, %xmm0, %xmm0
-; ESTIMATE-NEXT:    retq
-  %call = tail call float @__sqrtf_finite(float %f) #1
+  %call = tail call float @__sqrtf_finite(float %f) #2
   ret float %call
 }
 
+define float @finite_f32_estimate(float %f) #1 {
+; AVX-LABEL: finite_f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vandnps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
 
-define x86_fp80 @fld(x86_fp80 %ld) #0 {
-; NORECIP-LABEL: fld:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    fldt {{[0-9]+}}(%rsp)
-; NORECIP-NEXT:    fsqrt
-; NORECIP-NEXT:    retq
+define x86_fp80 @finite_f80_no_estimate(x86_fp80 %ld) #0 {
+; AVX-LABEL: finite_f80_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT:    fsqrt
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: fld:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    fldt {{[0-9]+}}(%rsp)
-; ESTIMATE-NEXT:    fsqrt
-; ESTIMATE-NEXT:    retq
-  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
   ret x86_fp80 %call
 }
 
+; Don't die on the impossible.
 
+define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
+; AVX-LABEL: finite_f80_estimate_but_no:
+; AVX:       # BB#0:
+; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT:    fsqrt
+; AVX-NEXT:    retq
+;
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
+  ret x86_fp80 %call
+}
 
-define float @reciprocal_square_root(float %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtss %xmm0, %xmm1
-; NORECIP-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; NORECIP-NEXT:    divss %xmm1, %xmm0
-; NORECIP-NEXT:    retq
+define float @f32_no_estimate(float %x) #0 {
+; AVX-LABEL: f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: reciprocal_square_root:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
-; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm0, %xmm1, %xmm0
-; ESTIMATE-NEXT:    retq
   %sqrt = tail call float @llvm.sqrt.f32(float %x)
   %div = fdiv fast float 1.0, %sqrt
   ret float %div
 }
 
-define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root_v4f32:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtps %xmm0, %xmm1
-; NORECIP-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; NORECIP-NEXT:    divps %xmm1, %xmm0
-; NORECIP-NEXT:    retq
+define float @f32_estimate(float %x) #1 {
+; AVX-LABEL: f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm1, %xmm2
+; AVX-NEXT:    vmulss %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call float @llvm.sqrt.f32(float %x)
+  %div = fdiv fast float 1.0, %sqrt
+  ret float %div
+}
+
+define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
+; AVX-LABEL: v4f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtps %xmm0, %xmm0
+; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtps %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulps %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT:    vmulps %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
-; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulps %xmm0, %xmm1, %xmm0
-; ESTIMATE-NEXT:    retq
   %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <4 x float> %div
 }
 
-define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root_v8f32:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtps %xmm1, %xmm2
-; NORECIP-NEXT:    sqrtps %xmm0, %xmm3
-; NORECIP-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; NORECIP-NEXT:    movaps %xmm1, %xmm0
-; NORECIP-NEXT:    divps %xmm3, %xmm0
-; NORECIP-NEXT:    divps %xmm2, %xmm1
-; NORECIP-NEXT:    retq
+define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
+; AVX-LABEL: v4f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm1, %xmm2
+; AVX-NEXT:    vmulps %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <4 x float> %div
+}
+
+define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
+; AVX-LABEL: v8f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtps %ymm0, %ymm0
+; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <8 x float> %div
+}
+
+define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
+; AVX-LABEL: v8f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm1, %ymm2
+; AVX-NEXT:    vmulps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
 ;
-; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtps %ymm0, %ymm1
-; ESTIMATE-NEXT:    vmulps %ymm1, %ymm1, %ymm2
-; ESTIMATE-NEXT:    vmulps %ymm2, %ymm0, %ymm0
-; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
-; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
-; ESTIMATE-NEXT:    vmulps %ymm0, %ymm1, %ymm0
-; ESTIMATE-NEXT:    retq
   %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <8 x float> %div
 }
 
 
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
+attributes #2 = { nounwind readnone }
author	Sanjay Patel <spatel@rotateright.com>	2016-10-04 20:46:43 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2016-10-04 20:46:43 +0000
commit	bfdbea6481a2247fcc63c27e3acd393000fd8d41 (patch)
tree	e7dfce8045101809ecdb13200171bdfab26beda5 /llvm/test/CodeGen
parent	0bb72c14240496d74f9263fad6aba830625499bd (diff)
download	bcm5719-llvm-bfdbea6481a2247fcc63c27e3acd393000fd8d41.tar.gz bcm5719-llvm-bfdbea6481a2247fcc63c27e3acd393000fd8d41.zip