From f6f7d5d1dd0dae8c2f45aab6f2d96a3f31a256fa Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 12 Nov 2014 21:39:01 +0000 Subject: Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86). This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385. This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests. llvm-svn: 221819 --- llvm/test/CodeGen/X86/recip-fastmath.ll | 61 ++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 12 deletions(-) (limited to 'llvm/test/CodeGen/X86/recip-fastmath.ll') diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index dd5563c965f..83b86accdb3 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE ; If the target's divss/divps instructions are substantially ; slower than rcpss/rcpps with a Newton-Raphson refinement, @@ -21,11 +22,23 @@ define float @reciprocal_estimate(float %x) #0 { ; BTVER2-LABEL: reciprocal_estimate: ; BTVER2: vrcpss -; BTVER2-NEXT: vmulss -; BTVER2-NEXT: vsubss -; BTVER2-NEXT: vmulss -; BTVER2-NEXT: vaddss +; BTVER2: vmulss +; BTVER2: vsubss +; BTVER2: vmulss +; BTVER2: vaddss ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate: +; REFINE: vrcpss +; REFINE: vmulss +; REFINE: vsubss +; REFINE: vmulss +; REFINE: vaddss +; REFINE: vmulss +; REFINE: vsubss +; REFINE: vmulss +; REFINE: vaddss +; REFINE-NEXT: retq } define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { @@ -40,11 +53,23 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { ; BTVER2-LABEL: reciprocal_estimate_v4f32: ; BTVER2: vrcpps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vsubps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vaddps +; BTVER2: vmulps +; BTVER2: vsubps +; BTVER2: vmulps +; BTVER2: vaddps ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate_v4f32: +; REFINE: vrcpps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE-NEXT: retq } define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { @@ -62,11 +87,23 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { ; BTVER2-LABEL: reciprocal_estimate_v8f32: ; BTVER2: vrcpps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vsubps -; BTVER2-NEXT: vmulps -; BTVER2-NEXT: vaddps +; BTVER2: vmulps +; BTVER2: vsubps +; BTVER2: vmulps +; BTVER2: vaddps ; BTVER2-NEXT: retq + +; REFINE-LABEL: reciprocal_estimate_v8f32: +; REFINE: vrcpps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE: vmulps +; REFINE: vsubps +; REFINE: vmulps +; REFINE: vaddps +; REFINE-NEXT: retq } attributes #0 = { "unsafe-fp-math"="true" } -- cgit v1.2.3