diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fma-combine.ll | 200 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma_patterns.ll | 305 |
3 files changed, 506 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll index bd574b87711..6f3437048ed 100644 --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -364,5 +364,205 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias % ret void } +; +; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) +; + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; +; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; + +; FUNC-LABEL: {{^}}test_f32_interp: +; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]] +; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]] +define void @test_f32_interp(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2, + float addrspace(1)* %in3) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %t = load float, float addrspace(1)* %in3 + %t1 = fsub float 1.0, %t + %tx = fmul float %x, %t + %ty = fmul float %y, %t1 + %r = fadd float %tx, %ty + store float %r, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f64_interp: +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]] +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]] +define void @test_f64_interp(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2, + double addrspace(1)* %in3) { + %x = load double, double addrspace(1)* %in1 + %y = load double, double addrspace(1)* %in2 + %t = load double, double addrspace(1)* %in3 + %t1 = fsub double 1.0, %t + %tx = fmul double %x, %t + %ty = fmul double %y, %t1 + %r = fadd double %tx, %ty + store double %r, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll index a64dd0ebd2d..0c3e4ecaa1a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll @@ -4,7 +4,7 @@ declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone ; FUNC-LABEL: {{^}}test_lrp: -; SI: v_sub_f32 +; SI: v_mad_f32 ; SI: v_mac_f32_e32 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind { %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 4aab2118b76..0696f7c3533 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -2,6 +2,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4 +; +; Patterns (+ fneg variants): add(mul(x,y),z), sub(mul(x,y),z) +; + define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: test_x86_fmadd_ps: ; CHECK: # BB#0: @@ -264,3 +268,304 @@ define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 ret <4 x float> %res } +; +; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) +; + +define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_add_x_one_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_one_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %a, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_add_x_one: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_one: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_add_x_negone_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_negone_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %a, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_add_x_negone: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_negone: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_one_x_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_one_x_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_one_x: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_one_x: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_negone_x_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_negone_x: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_x_one_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_one_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_x_one: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_one: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_x_negone_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_x_negone: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +; +; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; + +define float @test_f32_interp(float %x, float %y, float %t) { +; CHECK-LABEL: test_f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub float 1.0, %t + %tx = fmul float %x, %t + %ty = fmul float %y, %t1 + %r = fadd float %tx, %ty + ret float %r +} + +define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { +; CHECK-LABEL: test_v4f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul <4 x float> %x, %t + %ty = fmul <4 x float> %y, %t1 + %r = fadd <4 x float> %tx, %ty + ret <4 x float> %r +} + +define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { +; CHECK-LABEL: test_v8f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 +; CHECK-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v8f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 +; CHECK_FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul <8 x float> %x, %t + %ty = fmul <8 x float> %y, %t1 + %r = fadd <8 x float> %tx, %ty + ret <8 x float> %r +} + +define double @test_f64_interp(double %x, double %y, double %t) { +; CHECK-LABEL: test_f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub double 1.0, %t + %tx = fmul double %x, %t + %ty = fmul double %y, %t1 + %r = fadd double %tx, %ty + ret double %r +} + +define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { +; CHECK-LABEL: test_v2f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v2f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t + %tx = fmul <2 x double> %x, %t + %ty = fmul <2 x double> %y, %t1 + %r = fadd <2 x double> %tx, %ty + ret <2 x double> %r +} + +define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { +; CHECK-LABEL: test_v4f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 +; CHECK-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 +; CHECK_FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t + %tx = fmul <4 x double> %x, %t + %ty = fmul <4 x double> %y, %t1 + %r = fadd <4 x double> %tx, %ty + ret <4 x double> %r +} |

