diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/recip-fastmath.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/recip-fastmath.ll | 205 |
1 files changed, 123 insertions, 82 deletions
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 8e02dad9d5a..bd622d0442e 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX ; If the target's divss/divps instructions are substantially ; slower than rcpss/rcpps with a Newton-Raphson refinement, @@ -10,100 +9,142 @@ ; for details about the accuracy, speed, and implementation ; differences of x86 reciprocal estimates. -define float @reciprocal_estimate(float %x) #0 { +define float @f32_no_estimate(float %x) #0 { +; AVX-LABEL: f32_no_estimate: +; AVX: # BB#0: +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; %div = fdiv fast float 1.0, %x ret float %div +} -; NORECIP-LABEL: reciprocal_estimate: -; NORECIP: movss -; NORECIP-NEXT: divss -; NORECIP-NEXT: movaps -; NORECIP-NEXT: retq - -; RECIP-LABEL: reciprocal_estimate: -; RECIP: vrcpss -; RECIP: vmulss -; RECIP: vsubss -; RECIP: vmulss -; RECIP: vaddss -; RECIP-NEXT: retq +define float @f32_one_step(float %x) #1 { +; AVX-LABEL: f32_one_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; + %div = fdiv fast float 1.0, %x + ret float %div +} -; REFINE-LABEL: reciprocal_estimate: -; REFINE: vrcpss -; REFINE: vmulss -; REFINE: vsubss -; REFINE: vmulss -; REFINE: vaddss -; REFINE: vmulss -; REFINE: vsubss -; REFINE: vmulss -; REFINE: vaddss -; REFINE-NEXT: retq +define float @f32_two_step(float %x) #2 { +; AVX-LABEL: f32_two_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; + %div = fdiv fast float 1.0, %x + ret float %div } -define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { +define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { +; AVX-LABEL: v4f32_no_estimate: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <4 x float> %div +} -; NORECIP-LABEL: reciprocal_estimate_v4f32: -; NORECIP: movaps -; NORECIP-NEXT: divps -; NORECIP-NEXT: movaps -; NORECIP-NEXT: retq - -; RECIP-LABEL: reciprocal_estimate_v4f32: -; RECIP: vrcpps -; RECIP: vmulps -; RECIP: vsubps -; RECIP: vmulps -; RECIP: vaddps -; RECIP-NEXT: retq +define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { +; AVX-LABEL: v4f32_one_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpps %xmm0, %xmm1 +; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; + %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + ret <4 x float> %div +} -; REFINE-LABEL: reciprocal_estimate_v4f32: -; REFINE: vrcpps -; REFINE: vmulps -; REFINE: vsubps -; REFINE: vmulps -; REFINE: vaddps -; REFINE: vmulps -; REFINE: vsubps -; REFINE: vmulps -; REFINE: vaddps -; REFINE-NEXT: retq +define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { +; AVX-LABEL: v4f32_two_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpps %xmm0, %xmm1 +; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq +; + %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + ret <4 x float> %div } -define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { +define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { +; AVX-LABEL: v8f32_no_estimate: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <8 x float> %div +} -; NORECIP-LABEL: reciprocal_estimate_v8f32: -; NORECIP: movaps -; NORECIP: movaps -; NORECIP-NEXT: divps -; NORECIP-NEXT: divps -; NORECIP-NEXT: movaps -; NORECIP-NEXT: movaps -; NORECIP-NEXT: retq - -; RECIP-LABEL: reciprocal_estimate_v8f32: -; RECIP: vrcpps -; RECIP: vmulps -; RECIP: vsubps -; RECIP: vmulps -; RECIP: vaddps -; RECIP-NEXT: retq +define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { +; AVX-LABEL: v8f32_one_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpps %ymm0, %ymm1 +; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; + %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x + ret <8 x float> %div +} -; REFINE-LABEL: reciprocal_estimate_v8f32: -; REFINE: vrcpps -; REFINE: vmulps -; REFINE: vsubps -; REFINE: vmulps -; REFINE: vaddps -; REFINE: vmulps -; REFINE: vsubps -; REFINE: vmulps -; REFINE: vaddps -; REFINE-NEXT: retq +define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { +; AVX-LABEL: v8f32_two_step: +; AVX: # BB#0: +; AVX-NEXT: vrcpps %ymm0, %ymm1 +; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; AVX-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; AVX-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; AVX-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; + %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x + ret <8 x float> %div } -attributes #0 = { "unsafe-fp-math"="true" } +attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" } +attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" } +attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" } + |