diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-09-13 11:12:56 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-09-13 11:12:56 +0000 |
| commit | 322fc53725e8b19a9ed4a19bd1c05abdc1b9d13d (patch) | |
| tree | ae3486c85c31761fe9ee31383a80836729289a7b /llvm | |
| parent | ed94bd9223c14d101eb54c45b02481c1fb00c91d (diff) | |
| download | bcm5719-llvm-322fc53725e8b19a9ed4a19bd1c05abdc1b9d13d.tar.gz bcm5719-llvm-322fc53725e8b19a9ed4a19bd1c05abdc1b9d13d.zip | |
[X86][FMA] Added *213 fma instructions to scheduling tests
Annoyingly the 132/231 variants are pretty tricky to create when you need to due to weak FMA commutation patterns.
llvm-svn: 313142
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-schedule.ll | 1269 |
1 files changed, 1269 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll new file mode 100644 index 00000000000..f482659e63e --- /dev/null +++ b/llvm/test/CodeGen/X86/fma-schedule.ll @@ -0,0 +1,1269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 + +; +; VFMADD132 (TODO) +; + +; +; VFMADD213 +; + +define <2 x double> @test_vfmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmadd213pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213pd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213pd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213pd: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213pd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfmadd213pd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213pd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213pd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213pd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213pd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmadd213ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213ps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213ps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213ps: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213ps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfmadd213ps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213ps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213ps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213ps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213ps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +define <2 x double> @test_vfmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmadd213sd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213sd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213sd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213sd: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213sd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x float> @test_vfmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmadd213ss: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmadd213ss: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmadd213ss: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmadd213ss: +; KNL: # BB#0: +; KNL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmadd213ss: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +; +; VFMADD231 (TODO) +; + +; +; VFMADDSUB132 (TODO) +; + +; +; VFMADDSUB213 +; + +define <2 x double> @test_vfmaddsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmaddsubpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmaddsubpd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmaddsubpd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmaddsubpd: +; KNL: # BB#0: +; KNL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmaddsubpd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfmaddsubpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfmaddsubpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmaddsubpd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmaddsubpd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmaddsubpd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmaddsubpd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfmaddsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmaddsubps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmaddsubps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmaddsubps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmaddsubps: +; KNL: # BB#0: +; KNL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmaddsubps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfmaddsubps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfmaddsubps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmaddsubps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmaddsubps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmaddsubps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmaddsubps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +; +; VFMADDSUB231 (TODO) +; + +; +; VFMSUBADD132 (TODO) +; + +; +; VFMSUBADD213 +; + +define <2 x double> @test_vfmsubaddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmsubaddpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsubaddpd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsubaddpd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsubaddpd: +; KNL: # BB#0: +; KNL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsubaddpd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfmsubaddpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfmsubaddpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsubaddpd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsubaddpd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsubaddpd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsubaddpd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfmsubaddps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmsubaddps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsubaddps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsubaddps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsubaddps: +; KNL: # BB#0: +; KNL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsubaddps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfmsubaddps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfmsubaddps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsubaddps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsubaddps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsubaddps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsubaddps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +; +; VFMSUBADD231 (TODO) +; + +; +; VFMSUB132 (TODO) +; + +; +; VFMSUB213 +; + +define <2 x double> @test_vfmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmsub213pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213pd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213pd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213pd: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213pd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfmsub213pd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213pd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213pd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213pd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213pd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmsub213ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213ps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213ps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213ps: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213ps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfmsub213ps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213ps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213ps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213ps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213ps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +define <2 x double> @test_vfmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfmsub213sd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213sd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213sd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213sd: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213sd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x float> @test_vfmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfmsub213ss: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfmsub213ss: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfmsub213ss: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfmsub213ss: +; KNL: # BB#0: +; KNL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfmsub213ss: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +; +; VFMSUB231 (TODO) +; + +; +; VFNMADD132 (TODO) +; + +; +; VFNMADD213 +; + +define <2 x double> @test_vfnmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfnmadd213pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213pd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213pd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213pd: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213pd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfnmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfnmadd213pd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213pd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213pd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213pd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213pd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfnmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfnmadd213ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213ps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213ps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213ps: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213ps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfnmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfnmadd213ps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213ps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213ps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213ps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213ps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +define <2 x double> @test_vfnmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfnmadd213sd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213sd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213sd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213sd: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213sd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x float> @test_vfnmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfnmadd213ss: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmadd213ss: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmadd213ss: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmadd213ss: +; KNL: # BB#0: +; KNL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmadd213ss: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +; +; VFNMADD231 (TODO) +; + +; +; VFNMSUB132 (TODO) +; + +; +; VFNMSUB213 +; + +define <2 x double> @test_vfnmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfnmsub213pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213pd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213pd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213pd: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213pd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x double> @test_vfnmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { +; GENERIC-LABEL: test_vfnmsub213pd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213pd_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213pd_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213pd_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213pd_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = load <4 x double>, <4 x double> *%a3 + %3 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + +define <4 x float> @test_vfnmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfnmsub213ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213ps: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213ps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213ps: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213ps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +define <8 x float> @test_vfnmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { +; GENERIC-LABEL: test_vfnmsub213ps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 +; GENERIC-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213ps_ymm: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213ps_ymm: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213ps_ymm: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213ps_ymm: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 +; ZNVER1-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = load <8 x float>, <8 x float> *%a3 + %3 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + +define <2 x double> @test_vfnmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { +; GENERIC-LABEL: test_vfnmsub213sd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213sd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213sd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213sd: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213sd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = load <2 x double>, <2 x double> *%a3 + %3 = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + +define <4 x float> @test_vfnmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { +; GENERIC-LABEL: test_vfnmsub213ss: +; GENERIC: # BB#0: +; GENERIC-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 +; GENERIC-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_vfnmsub213ss: +; HASWELL: # BB#0: +; HASWELL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_vfnmsub213ss: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [2:1.00] +; +; KNL-LABEL: test_vfnmsub213ss: +; KNL: # BB#0: +; KNL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] +; +; ZNVER1-LABEL: test_vfnmsub213ss: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 +; ZNVER1-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = load <4 x float>, <4 x float> *%a3 + %3 = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + +; +; VFNMSUB231 (TODO) +; + + +declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + +declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + +declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) +declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) |

