summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/recip-fastmath.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/recip-fastmath.ll')
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath.ll180
1 files changed, 84 insertions, 96 deletions
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index c7442f2bc20..c618c37e4fe 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -122,88 +122,87 @@ define float @f32_one_step_variables(float %x, float %y) #1 {
; SSE-LABEL: f32_one_step_variables:
; SSE: # %bb.0:
; SSE-NEXT: rcpss %xmm1, %xmm2
-; SSE-NEXT: mulss %xmm2, %xmm1
-; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT: subss %xmm1, %xmm3
+; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulss %xmm2, %xmm3
-; SSE-NEXT: addss %xmm2, %xmm3
-; SSE-NEXT: mulss %xmm3, %xmm0
+; SSE-NEXT: mulss %xmm3, %xmm1
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: mulss %xmm2, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: f32_one_step_variables:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; AVX-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm1
-; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; AVX-RECIP-NEXT: vsubss %xmm1, %xmm3, %xmm1
-; AVX-RECIP-NEXT: vmulss %xmm1, %xmm2, %xmm1
-; AVX-RECIP-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX-RECIP-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; AVX-RECIP-NEXT: vmulss %xmm3, %xmm1, %xmm1
+; AVX-RECIP-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-RECIP-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; AVX-RECIP-NEXT: vaddss %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: f32_one_step_variables:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm2 * xmm1) + mem
-; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; FMA-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; FMA-RECIP-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; FMA-RECIP-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; FMA-RECIP-NEXT: vmovaps %xmm2, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step_variables:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm2, %xmm1, %xmm1
-; BDVER2-NEXT: vfmaddss %xmm2, %xmm1, %xmm2, %xmm1
-; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; BDVER2-NEXT: vfnmaddss %xmm0, %xmm3, %xmm1, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm3, %xmm0, %xmm2, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_variables:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vsubss %xmm1, %xmm3, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm2, %xmm1
-; BTVER2-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; BTVER2-NEXT: vmulss %xmm3, %xmm1, %xmm1
+; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_variables:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm1, %xmm3, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm2, %xmm1
-; SANDY-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; SANDY-NEXT: vmulss %xmm3, %xmm1, %xmm1
+; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm3, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_variables:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm2 * xmm1) + mem
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; HASWELL-NEXT: vmovaps %xmm2, %xmm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_variables:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm1
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vsubss %xmm1, %xmm3, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm2, %xmm1
-; HASWELL-NO-FMA-NEXT: vaddss %xmm1, %xmm2, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; HASWELL-NO-FMA-NEXT: vmulss %xmm3, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: f32_one_step_variables:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpss %xmm1, %xmm1, %xmm2
-; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm2 * xmm1) + mem
-; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm3
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%div = fdiv fast float %x, %y
ret float %div
@@ -484,99 +483,88 @@ define <4 x float> @v4f32_one_step_variables(<4 x float> %x, <4 x float> %y) #1
; SSE-LABEL: v4f32_one_step_variables:
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm1, %xmm2
-; SSE-NEXT: mulps %xmm2, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; SSE-NEXT: subps %xmm1, %xmm3
+; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
-; SSE-NEXT: addps %xmm2, %xmm3
-; SSE-NEXT: mulps %xmm3, %xmm0
+; SSE-NEXT: mulps %xmm3, %xmm1
+; SSE-NEXT: subps %xmm1, %xmm0
+; SSE-NEXT: mulps %xmm2, %xmm0
+; SSE-NEXT: addps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_one_step_variables:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm1, %xmm2
-; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX-RECIP-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; AVX-RECIP-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; AVX-RECIP-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-RECIP-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; AVX-RECIP-NEXT: vmulps %xmm3, %xmm1, %xmm1
+; AVX-RECIP-NEXT: vsubps %xmm1, %xmm0, %xmm0
+; AVX-RECIP-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; AVX-RECIP-NEXT: vaddps %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_one_step_variables:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm1, %xmm2
-; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + mem
-; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; FMA-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; FMA-RECIP-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; FMA-RECIP-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; FMA-RECIP-NEXT: vmovaps %xmm2, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step_variables:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm1, %xmm2
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm2, %xmm1, %xmm1
-; BDVER2-NEXT: vfmaddps %xmm2, %xmm1, %xmm2, %xmm1
-; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; BDVER2-NEXT: vfnmaddps %xmm0, %xmm3, %xmm1, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm3, %xmm0, %xmm2, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step_variables:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %xmm1, %xmm2
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; BTVER2-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; BTVER2-NEXT: vmulps %xmm3, %xmm1, %xmm1
+; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step_variables:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm1, %xmm2
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; SANDY-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; SANDY-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; SANDY-NEXT: vmulps %xmm3, %xmm1, %xmm1
+; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm3, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step_variables:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm1, %xmm2
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm3
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; HASWELL-NEXT: vmovaps %xmm2, %xmm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_variables:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm1, %xmm2
-; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; HASWELL-NO-FMA-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; HASWELL-NO-FMA-NEXT: vmulps %xmm3, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vsubps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v4f32_one_step_variables:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm1, %xmm2
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm3
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; KNL-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: v4f32_one_step_variables:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm1, %xmm2
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + mem
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm1 = (xmm1 * xmm2) + xmm2
-; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SKX-NEXT: retq
+; AVX512-LABEL: v4f32_one_step_variables:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %xmm1, %xmm2
+; AVX512-NEXT: vmulps %xmm2, %xmm0, %xmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
+; AVX512-NEXT: vmovaps %xmm2, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> %x, %y
ret <4 x float> %div
}
OpenPOWER on IntegriCloud