Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86).

This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385. This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests. llvm-svn: 221819
author: Sanjay Patel <spatel@rotateright.com> 2014-11-12 21:39:01 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2014-11-12 21:39:01 +0000
commit: f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa (patch)
tree: cf64baa20863e81c2a5fb4bb3a1f0406fcb2ad30 /llvm/test/CodeGen/X86/recip-fastmath.ll
parent: 6d829bdbef0da4ebb626fa793652c599e9826ed8 (diff)
download: bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.tar.gz
bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.zip
1 files changed, 49 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index dd5563c965f..83b86accdb3 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -21,11 +22,23 @@ define float @reciprocal_estimate(float %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate:
 ; BTVER2: vrcpss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vsubss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vaddss
+; BTVER2: vmulss
+; BTVER2: vsubss
+; BTVER2: vmulss
+; BTVER2: vaddss
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate:
+; REFINE: vrcpss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE-NEXT: retq
 }
 
 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
@@ -40,11 +53,23 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate_v4f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v4f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
@@ -62,11 +87,23 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate_v8f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v8f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 attributes #0 = { "unsafe-fp-math"="true" }
author	Sanjay Patel <spatel@rotateright.com>	2014-11-12 21:39:01 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2014-11-12 21:39:01 +0000
commit	f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa (patch)
tree	cf64baa20863e81c2a5fb4bb3a1f0406fcb2ad30 /llvm/test/CodeGen/X86/recip-fastmath.ll
parent	6d829bdbef0da4ebb626fa793652c599e9826ed8 (diff)
download	bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.tar.gz bcm5719-llvm-f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa.zip