diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-25 18:32:43 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-25 18:32:43 +0000 |
| commit | e485631cd148e18701254634a3003e6bb5797eb2 (patch) | |
| tree | 6e7e208068025363a98e57a88f3a7ede7f48069e /llvm/test | |
| parent | ea37e201ec2f9c3d8b2c9bb37ff48cacdd992f55 (diff) | |
| download | bcm5719-llvm-e485631cd148e18701254634a3003e6bb5797eb2.tar.gz bcm5719-llvm-e485631cd148e18701254634a3003e6bb5797eb2.zip | |
[X86] Add separate intrinsics for scalar FMA4 instructions.
Summary:
These instructions zero the non-scalar part of the lower 128-bits which makes them different than the FMA3 instructions which pass through the non-scalar part of the lower 128-bits.
I've only added fmadd because we should be able to derive all other variants using operand negation in the intrinsic header like we do for AVX512.
I think there are still some missed negate folding opportunities with the FMA4 instructions in light of this behavior difference that I hadn't noticed before.
I've split the tests so that we can use different intrinsics for scalar testing between the two. I just copied the tests split the RUN lines and changed out the scalar intrinsics.
fma4-fneg-combine.ll is a new test to make sure we negate the fma4 intrinsics correctly though there are a couple TODOs in it.
Reviewers: RKSimon, spatel
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D39851
llvm-svn: 318984
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-commute-x86.ll | 441 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-intrinsics-x86.ll | 201 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-scalar-memfold.ll | 113 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma4-commute-x86.ll | 563 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma4-fneg-combine.ll | 111 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll | 289 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma4-scalar-memfold.ll | 104 |
8 files changed, 1081 insertions, 769 deletions
diff --git a/llvm/test/CodeGen/X86/fma-commute-x86.ll b/llvm/test/CodeGen/X86/fma-commute-x86.ll index 3ddbf261cf0..bf8b9aaae13 100644 --- a/llvm/test/CodeGen/X86/fma-commute-x86.ll +++ b/llvm/test/CodeGen/X86/fma-commute-x86.ll @@ -2,7 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA -; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4 attributes #0 = { nounwind } @@ -14,13 +13,6 @@ define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vmovaps (%rdx), %xmm1 -; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -31,12 +23,6 @@ define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -47,12 +33,6 @@ define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -64,12 +44,6 @@ define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -80,12 +54,6 @@ define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -96,12 +64,6 @@ define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -113,12 +75,6 @@ define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res } @@ -129,12 +85,6 @@ define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -145,12 +95,6 @@ define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %ymm0 ; FMA-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -163,13 +107,6 @@ define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vmovapd (%rdx), %xmm1 -; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -180,12 +117,6 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -196,12 +127,6 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -213,12 +138,6 @@ define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -229,12 +148,6 @@ define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -245,12 +158,6 @@ define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -262,12 +169,6 @@ define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_baa_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res } @@ -278,12 +179,6 @@ define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_aba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -294,12 +189,6 @@ define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rdx), %ymm0 ; FMA-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmadd_bba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -313,13 +202,6 @@ define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vmovaps (%rdx), %xmm1 -; FMA4-NEXT: vfnmaddss %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -330,12 +212,6 @@ define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddss %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -346,12 +222,6 @@ define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddss (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -363,12 +233,6 @@ define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -379,12 +243,6 @@ define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -395,12 +253,6 @@ define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -412,12 +264,6 @@ define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res } @@ -428,12 +274,6 @@ define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -444,12 +284,6 @@ define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rdx), %ymm0 ; FMA-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -462,13 +296,6 @@ define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vmovapd (%rdx), %xmm1 -; FMA4-NEXT: vfnmaddsd %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -479,12 +306,6 @@ define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddsd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -495,12 +316,6 @@ define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddsd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -512,12 +327,6 @@ define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -528,12 +337,6 @@ define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -544,12 +347,6 @@ define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -561,12 +358,6 @@ define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_baa_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res } @@ -577,12 +368,6 @@ define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_aba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -593,12 +378,6 @@ define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rdx), %ymm0 ; FMA-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmadd_bba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -611,13 +390,6 @@ define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vmovaps (%rdx), %xmm1 -; FMA4-NEXT: vfmsubss %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -628,12 +400,6 @@ define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmsubss %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -644,12 +410,6 @@ define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmsubss (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -661,12 +421,6 @@ define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -677,12 +431,6 @@ define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -693,12 +441,6 @@ define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -710,12 +452,6 @@ define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res } @@ -726,12 +462,6 @@ define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -742,12 +472,6 @@ define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %ymm0 ; FMA-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -760,13 +484,6 @@ define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vmovapd (%rdx), %xmm1 -; FMA4-NEXT: vfmsubsd %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -777,12 +494,6 @@ define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmsubsd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -793,12 +504,6 @@ define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmsubsd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -810,12 +515,6 @@ define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -826,12 +525,6 @@ define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -842,12 +535,6 @@ define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -859,12 +546,6 @@ define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_baa_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res } @@ -875,12 +556,6 @@ define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_aba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -891,12 +566,6 @@ define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA-NEXT: vmovapd (%rdx), %ymm0 ; FMA-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fmsub_bba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -910,13 +579,6 @@ define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vmovaps (%rdx), %xmm1 -; FMA4-NEXT: vfnmsubss %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -927,12 +589,6 @@ define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubss %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -943,12 +599,6 @@ define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubss (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -960,12 +610,6 @@ define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res } @@ -976,12 +620,6 @@ define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rcx), %xmm0 ; FMA-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -992,12 +630,6 @@ define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA-NEXT: vmovaps (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_ps: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res } @@ -1009,12 +641,6 @@ define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res } @@ -1025,12 +651,6 @@ define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rcx), %ymm0 ; FMA-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -1041,12 +661,6 @@ define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA-NEXT: vmovaps (%rdx), %ymm0 ; FMA-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_ps_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res } @@ -1059,13 +673,6 @@ define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vmovapd (%rdx), %xmm1 -; FMA4-NEXT: vfnmsubsd %xmm0, %xmm0, %xmm1, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1076,12 +683,6 @@ define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubsd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1092,12 +693,6 @@ define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubsd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1109,12 +704,6 @@ define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1125,12 +714,6 @@ define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rcx), %xmm0 ; FMA-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1141,12 +724,6 @@ define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA-NEXT: vmovapd (%rdx), %xmm0 ; FMA-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_pd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res } @@ -1158,12 +735,6 @@ define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_baa_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res } @@ -1174,12 +745,6 @@ define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rcx), %ymm0 ; FMA-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_aba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } @@ -1190,12 +755,6 @@ define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA-NEXT: vmovapd (%rdx), %ymm0 ; FMA-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0 ; FMA-NEXT: retq -; -; FMA4-LABEL: test_x86_fnmsub_bba_pd_y: -; FMA4: # BB#0: -; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0 -; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res } diff --git a/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll b/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll index 362864f72a9..6b28d0c19cf 100644 --- a/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll @@ -2,7 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4 ; VFMADD define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { @@ -22,11 +21,6 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -50,11 +44,6 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_bac_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res } @@ -77,11 +66,6 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -105,11 +89,6 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> % ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_bac_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res } @@ -132,11 +111,6 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -159,11 +133,6 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -186,11 +155,6 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -213,11 +177,6 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } @@ -241,11 +200,6 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -269,11 +223,6 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_bac_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6e,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res } @@ -296,11 +245,6 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -324,11 +268,6 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> % ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_bac_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6f,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res } @@ -351,11 +290,6 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -378,11 +312,6 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -405,11 +334,6 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -432,11 +356,6 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } @@ -460,11 +379,6 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -488,11 +402,6 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_bac_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7a,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res } @@ -515,11 +424,6 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -543,11 +447,6 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_bac_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7b,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res } @@ -570,11 +469,6 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -597,11 +491,6 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -624,11 +513,6 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -651,11 +535,6 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } @@ -679,11 +558,6 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -707,11 +581,6 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_bac_ss: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7e,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res } @@ -734,11 +603,6 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] ; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -762,11 +626,6 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_bac_sd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7f,0xc2,0x00] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res } @@ -789,11 +648,6 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -816,11 +670,6 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -843,11 +692,6 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -870,11 +714,6 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } @@ -898,11 +737,6 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -925,11 +759,6 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -952,11 +781,6 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -979,11 +803,6 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } @@ -1007,11 +826,6 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res } @@ -1034,11 +848,6 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res } @@ -1061,11 +870,6 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res } @@ -1088,11 +892,6 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00] ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] -; -; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256: -; CHECK-FMA4: # BB#0: -; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] -; CHECK-FMA4-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res } diff --git a/llvm/test/CodeGen/X86/fma-scalar-memfold.ll b/llvm/test/CodeGen/X86/fma-scalar-memfold.ll index 4b400da3206..23baeafe98d 100644 --- a/llvm/test/CodeGen/X86/fma-scalar-memfold.ll +++ b/llvm/test/CodeGen/X86/fma-scalar-memfold.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 ; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 -; RUN: llc < %s -mattr=fma4 | FileCheck %s --check-prefix=FMA4 target triple = "x86_64-unknown-unknown" @@ -22,13 +21,6 @@ define void @fmadd_aab_ss(float* %a, float* %b) { ; CHECK-NEXT: vfmadd213ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmadd_aab_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -55,13 +47,6 @@ define void @fmadd_aba_ss(float* %a, float* %b) { ; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmadd_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -88,13 +73,6 @@ define void @fmsub_aab_ss(float* %a, float* %b) { ; CHECK-NEXT: vfmsub213ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmsub_aab_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmsubss (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -121,13 +99,6 @@ define void @fmsub_aba_ss(float* %a, float* %b) { ; CHECK-NEXT: vfmsub132ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmsub_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmsubss %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -154,13 +125,6 @@ define void @fnmadd_aab_ss(float* %a, float* %b) { ; CHECK-NEXT: vfnmadd213ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmadd_aab_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfnmaddss (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -187,13 +151,6 @@ define void @fnmadd_aba_ss(float* %a, float* %b) { ; CHECK-NEXT: vfnmadd132ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmadd_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfnmaddss %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -220,13 +177,6 @@ define void @fnmsub_aab_ss(float* %a, float* %b) { ; CHECK-NEXT: vfnmsub213ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmsub_aab_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfnmsubss (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -253,13 +203,6 @@ define void @fnmsub_aba_ss(float* %a, float* %b) { ; CHECK-NEXT: vfnmsub132ss (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmsub_aba_ss: -; FMA4: # BB#0: -; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfnmsubss %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovss %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 @@ -286,13 +229,6 @@ define void @fmadd_aab_sd(double* %a, double* %b) { ; CHECK-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmadd_aab_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -315,13 +251,6 @@ define void @fmadd_aba_sd(double* %a, double* %b) { ; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmadd_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -344,13 +273,6 @@ define void @fmsub_aab_sd(double* %a, double* %b) { ; CHECK-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmsub_aab_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmsubsd (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -373,13 +295,6 @@ define void @fmsub_aba_sd(double* %a, double* %b) { ; CHECK-NEXT: vfmsub132sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fmsub_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmsubsd %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -402,13 +317,6 @@ define void @fnmadd_aab_sd(double* %a, double* %b) { ; CHECK-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmadd_aab_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfnmaddsd (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -431,13 +339,6 @@ define void @fnmadd_aba_sd(double* %a, double* %b) { ; CHECK-NEXT: vfnmadd132sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmadd_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfnmaddsd %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -460,13 +361,6 @@ define void @fnmsub_aab_sd(double* %a, double* %b) { ; CHECK-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmsub_aab_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfnmsubsd (%rsi), %xmm0, %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 @@ -489,13 +383,6 @@ define void @fnmsub_aba_sd(double* %a, double* %b) { ; CHECK-NEXT: vfnmsub132sd (%rsi), %xmm0, %xmm0 ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) ; CHECK-NEXT: retq -; -; FMA4-LABEL: fnmsub_aba_sd: -; FMA4: # BB#0: -; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfnmsubsd %xmm0, (%rsi), %xmm0, %xmm0 -; FMA4-NEXT: vmovlpd %xmm0, (%rdi) -; FMA4-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 diff --git a/llvm/test/CodeGen/X86/fma4-commute-x86.ll b/llvm/test/CodeGen/X86/fma4-commute-x86.ll new file mode 100644 index 00000000000..f47eb7c75a5 --- /dev/null +++ b/llvm/test/CodeGen/X86/fma4-commute-x86.ll @@ -0,0 +1,563 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA + +attributes #0 = { nounwind } + +declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_ss: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_ss: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_ss: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %xmm0 +; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %xmm0 +; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone +define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %ymm0 +; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_sd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_sd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_sd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %xmm0 +; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %xmm0 +; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone +define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_baa_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_aba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmadd_bba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %ymm0 +; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_baa_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_aba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_bba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %xmm0 +; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone +define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_baa_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_aba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_bba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %ymm0 +; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_baa_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_aba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_bba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %xmm0 +; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone +define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_baa_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_aba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmadd_bba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %ymm0 +; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_baa_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_aba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_bba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %xmm0 +; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone +define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_baa_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_aba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_bba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %ymm0 +; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_baa_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_aba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_bba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %xmm0 +; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone +define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_baa_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_aba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fmsub_bba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %ymm0 +; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_baa_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_aba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %xmm0 +; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_bba_ps: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %xmm0 +; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind + ret <4 x float> %res +} + +declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone +define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_baa_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_aba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rcx), %ymm0 +; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_bba_ps_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovaps (%rdx), %ymm0 +; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind + ret <8 x float> %res +} + +declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_baa_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_aba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %xmm0 +; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_bba_pd: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %xmm0 +; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: retq + %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind + ret <2 x double> %res +} + +declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone +define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_baa_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_aba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rcx), %ymm0 +; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + +define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { +; FMA4-LABEL: test_x86_fnmsub_bba_pd_y: +; FMA4: # BB#0: +; FMA4-NEXT: vmovapd (%rdx), %ymm0 +; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: retq + %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind + ret <4 x double> %res +} + diff --git a/llvm/test/CodeGen/X86/fma4-fneg-combine.ll b/llvm/test/CodeGen/X86/fma4-fneg-combine.ll new file mode 100644 index 00000000000..69f90d1d011 --- /dev/null +++ b/llvm/test/CodeGen/X86/fma4-fneg-combine.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4,-fma | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4,+fma | FileCheck %s + +declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) +declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + +; TODO this can be negated +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test1: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) + %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %res + ret <4 x float> %sub.i +} + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test2: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) + ret <4 x float> %res +} + +define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test3: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b + %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c) + ret <4 x float> %res +} + +define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test4: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a + %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) + ret <4 x float> %res +} + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test5: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a + %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub.i.2) + ret <4 x float> %res +} + +define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test6: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %res + ret <2 x double> %sub.i +} + +define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test7: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c + %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) + ret <2 x double> %res +} + +define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test8: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b + %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c) + ret <2 x double> %res +} + +define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test9: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a + %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) + ret <2 x double> %res +} + +define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test10: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a + %sub.i.2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c + %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub.i.2) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll new file mode 100644 index 00000000000..0cdf251cfba --- /dev/null +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK + +; VFMADD +define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma4_vfmadd_ss: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} + +define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma4_vfmadd_sd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} + +define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) + +define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmadd_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmadd_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +; VFMSUB +define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + +; VFNMADD +define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +; VFNMSUB +define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + +; VFMADDSUB +define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + +; VFMSUBADD +define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) + +define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) + +define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) + +define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: +; CHECK: # BB#0: +; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll index 5b86a7d7eea..a7f7500afb1 100644 --- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll @@ -3,50 +3,50 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s ; VFMADD -define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) { -; CHECK-LABEL: test_x86_fma_vfmadd_ss_load: +define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) { +; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load: ; CHECK: # BB#0: ; CHECK-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load float , float *%a2 %y = insertelement <4 x float> undef, float %x, i32 0 - %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) + %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ret < 4 x float > %res } -define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) { -; CHECK-LABEL: test_x86_fma_vfmadd_ss_load2: +define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) { +; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2: ; CHECK: # BB#0: ; CHECK-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load float , float *%a1 %y = insertelement <4 x float> undef, float %x, i32 0 - %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) + %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ret < 4 x float > %res } -declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone +declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone -define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) { -; CHECK-LABEL: test_x86_fma_vfmadd_sd_load: +define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) { +; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load: ; CHECK: # BB#0: ; CHECK-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load double , double *%a2 %y = insertelement <2 x double> undef, double %x, i32 0 - %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) + %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ret < 2 x double > %res } -define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) { -; CHECK-LABEL: test_x86_fma_vfmadd_sd_load2: +define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) { +; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2: ; CHECK: # BB#0: ; CHECK-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %x = load double , double *%a1 %y = insertelement <2 x double> undef, double %x, i32 0 - %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) + %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ret < 2 x double > %res } -declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone +declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) { ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load: ; CHECK: # BB#0: diff --git a/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll b/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll new file mode 100644 index 00000000000..b43e800795f --- /dev/null +++ b/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=fma4 | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) + +define void @fmadd_aab_ss(float* %a, float* %b) { +; CHECK-LABEL: fmadd_aab_ss: +; CHECK: # BB#0: +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovss %xmm0, (%rdi) +; CHECK-NEXT: retq + %a.val = load float, float* %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, float* %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + + %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) + + %sr = extractelement <4 x float> %vr, i32 0 + store float %sr, float* %a + ret void +} + +define void @fmadd_aba_ss(float* %a, float* %b) { +; CHECK-LABEL: fmadd_aba_ss: +; CHECK: # BB#0: +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vmovss %xmm0, (%rdi) +; CHECK-NEXT: retq + %a.val = load float, float* %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, float* %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + + %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) + + %sr = extractelement <4 x float> %vr, i32 0 + store float %sr, float* %a + ret void +} + +define void @fmadd_aab_sd(double* %a, double* %b) { +; CHECK-LABEL: fmadd_aab_sd: +; CHECK: # BB#0: +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: retq + %a.val = load double, double* %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, double* %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + + %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) + + %sr = extractelement <2 x double> %vr, i32 0 + store double %sr, double* %a + ret void +} + +define void @fmadd_aba_sd(double* %a, double* %b) { +; CHECK-LABEL: fmadd_aba_sd: +; CHECK: # BB#0: +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: retq + %a.val = load double, double* %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, double* %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + + %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) + + %sr = extractelement <2 x double> %vr, i32 0 + store double %sr, double* %a + ret void +} + |

