diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-21 20:32:48 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-21 20:32:48 +0000 |
| commit | 4003ed2da300c1c547c8ca96c211a2f8b0d24299 (patch) | |
| tree | db4d5c708e973a86a853afc831f3b1bbacd9b628 /llvm/test | |
| parent | 7649cd4389723896bc56c0fc17d04c7b2d05acb3 (diff) | |
| download | bcm5719-llvm-4003ed2da300c1c547c8ca96c211a2f8b0d24299.tar.gz bcm5719-llvm-4003ed2da300c1c547c8ca96c211a2f8b0d24299.zip | |
[DAGCombiner] Improve FMA support for interpolation patterns
This patch adds support for combining patterns such as (FMUL(FADD(1.0, x), y)) and (FMUL(FSUB(x, 1.0), y)) to their FMA equivalents.
This is useful in particular for linear interpolation cases such as (FADD(FMUL(x, t), FMUL(y, FSUB(1.0, t))))
Differential Revision: http://reviews.llvm.org/D13003
llvm-svn: 248210
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fma-combine.ll | 200 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma_patterns.ll | 305 |
3 files changed, 506 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll index bd574b87711..6f3437048ed 100644 --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -364,5 +364,205 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias % ret void } +; +; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) +; + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; +; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; + +; FUNC-LABEL: {{^}}test_f32_interp: +; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]] +; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]] +define void @test_f32_interp(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2, + float addrspace(1)* %in3) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %t = load float, float addrspace(1)* %in3 + %t1 = fsub float 1.0, %t + %tx = fmul float %x, %t + %ty = fmul float %y, %t1 + %r = fadd float %tx, %ty + store float %r, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f64_interp: +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]] +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]] +define void @test_f64_interp(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2, + double addrspace(1)* %in3) { + %x = load double, double addrspace(1)* %in1 + %y = load double, double addrspace(1)* %in2 + %t = load double, double addrspace(1)* %in3 + %t1 = fsub double 1.0, %t + %tx = fmul double %x, %t + %ty = fmul double %y, %t1 + %r = fadd double %tx, %ty + store double %r, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll index a64dd0ebd2d..0c3e4ecaa1a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll @@ -4,7 +4,7 @@ declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone ; FUNC-LABEL: {{^}}test_lrp: -; SI: v_sub_f32 +; SI: v_mad_f32 ; SI: v_mac_f32_e32 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind { %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 4aab2118b76..0696f7c3533 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -2,6 +2,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4 +; +; Patterns (+ fneg variants): add(mul(x,y),z), sub(mul(x,y),z) +; + define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: test_x86_fmadd_ps: ; CHECK: # BB#0: @@ -264,3 +268,304 @@ define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 ret <4 x float> %res } +; +; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) +; + +define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_add_x_one_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_one_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %a, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_add_x_one: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_one: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_add_x_negone_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_negone_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %a, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_add_x_negone: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_negone: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_one_x_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_one_x_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_one_x: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_one_x: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_negone_x_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_negone_x: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_x_one_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_one_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_x_one: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_one: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_sub_x_negone_y: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %s, %y + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_v4f32_mul_y_sub_x_negone: +; CHECK: # BB#0: +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul <4 x float> %y, %s + ret <4 x float> %m +} + +; +; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; + +define float @test_f32_interp(float %x, float %y, float %t) { +; CHECK-LABEL: test_f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub float 1.0, %t + %tx = fmul float %x, %t + %ty = fmul float %y, %t1 + %r = fadd float %tx, %ty + ret float %r +} + +define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { +; CHECK-LABEL: test_v4f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul <4 x float> %x, %t + %ty = fmul <4 x float> %y, %t1 + %r = fadd <4 x float> %tx, %ty + ret <4 x float> %r +} + +define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { +; CHECK-LABEL: test_v8f32_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 +; CHECK-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v8f32_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 +; CHECK_FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul <8 x float> %x, %t + %ty = fmul <8 x float> %y, %t1 + %r = fadd <8 x float> %tx, %ty + ret <8 x float> %r +} + +define double @test_f64_interp(double %x, double %y, double %t) { +; CHECK-LABEL: test_f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub double 1.0, %t + %tx = fmul double %x, %t + %ty = fmul double %y, %t1 + %r = fadd double %tx, %ty + ret double %r +} + +define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { +; CHECK-LABEL: test_v2f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v2f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 +; CHECK_FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t + %tx = fmul <2 x double> %x, %t + %ty = fmul <2 x double> %y, %t1 + %r = fadd <2 x double> %tx, %ty + ret <2 x double> %r +} + +define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { +; CHECK-LABEL: test_v4f64_interp: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 +; CHECK-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_interp: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 +; CHECK_FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t + %tx = fmul <4 x double> %x, %t + %ty = fmul <4 x double> %y, %t1 + %r = fadd <4 x double> %tx, %ty + ret <4 x double> %r +} |

