diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-11-12 21:39:01 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-11-12 21:39:01 +0000 |
commit | f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa (patch) | |
tree | cf64baa20863e81c2a5fb4bb3a1f0406fcb2ad30 /llvm/test/CodeGen/X86/recip-fastmath.ll | |
parent | 6d829bdbef0da4ebb626fa793652c599e9826ed8 (diff) | |
download | bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.tar.gz bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.zip |
Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86).
This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385.
This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but
we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT
sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests.
llvm-svn: 221819
Diffstat (limited to 'llvm/test/CodeGen/X86/recip-fastmath.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/recip-fastmath.ll | 61 |
1 files changed, 49 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index dd5563c965f..83b86accdb3 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE ; If the target's divss/divps instructions are substantially ; slower than rcpss/rcpps with a Newton-Raphson refinement, @@ -21,11 +22,23 @@ define float @reciprocal_estimate(float %x) #0 { ; BTVER2-LABEL: reciprocal_estimate: ; BTVER2: vrcpss -; BTVER2-NEXT: vmulss -; BTVER2-NEXT: vsubss -; BTVER2-NEXT: vmulss -; BTVER2-NEXT: vaddss +; BTVER2: vmulss +; BTVER2: vsubss +; BTVER2: vmulss +; BTVER2: vaddss ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate: +; REFINE: vrcpss +; REFINE: vmulss +; REFINE: vsubss +; REFINE: vmulss +; REFINE: vaddss +; REFINE: vmulss +; REFINE: vsubss +; REFINE: vmulss +; REFINE: vaddss +; REFINE-NEXT: retq } define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { @@ -40,11 +53,23 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { ; BTVER2-LABEL: reciprocal_estimate_v4f32: ; BTVER2: vrcpps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vsubps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vaddps +; BTVER2: vmulps +; BTVER2: vsubps +; BTVER2: vmulps +; BTVER2: vaddps ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate_v4f32: +; REFINE: vrcpps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE-NEXT: retq } define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { @@ -62,11 +87,23 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { ; BTVER2-LABEL: reciprocal_estimate_v8f32: ; BTVER2: vrcpps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vsubps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vaddps +; BTVER2: vmulps +; BTVER2: vsubps +; BTVER2: vmulps +; BTVER2: vaddps ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate_v8f32: +; REFINE: vrcpps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE-NEXT: retq } attributes #0 = { "unsafe-fp-math"="true" } |