diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-05 06:52:55 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-05 06:52:55 +0000 |
| commit | 350c5f1881a8387119545a90daced0e6cce1165d (patch) | |
| tree | 5f9c222714bb7403ac6e3dc24041c63c6c357b00 /llvm/test/CodeGen/X86 | |
| parent | 9d70afbb31264a9717065bb8595c6e565ef065e3 (diff) | |
| download | bcm5719-llvm-350c5f1881a8387119545a90daced0e6cce1165d.tar.gz bcm5719-llvm-350c5f1881a8387119545a90daced0e6cce1165d.zip | |
[X86] Remove X86 specific scalar FMA intrinsics and upgrade to tart independent FMA and extractelement/insertelement.
llvm-svn: 336315
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-commute-x86.ll | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-intrinsics-x86.ll | 112 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-scalar-memfold.ll | 40 |
4 files changed, 104 insertions, 88 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll index 9ebc3b8941b..4ab0af0e8d1 100644 --- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll @@ -43,16 +43,16 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; X32-LABEL: test3: ; X32: # %bb.0: # %entry -; X32-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] ; X32-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 -; X32-NEXT: vxorps %xmm3, %xmm0, %xmm0 +; X32-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0] +; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test3: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] ; X64-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 -; X64-NEXT: vxorps %xmm3, %xmm0, %xmm0 +; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0] +; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq entry: %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 diff --git a/llvm/test/CodeGen/X86/fma-commute-x86.ll b/llvm/test/CodeGen/X86/fma-commute-x86.ll index 6048d69ac42..64180f64110 100644 --- a/llvm/test/CodeGen/X86/fma-commute-x86.ll +++ b/llvm/test/CodeGen/X86/fma-commute-x86.ll @@ -9,9 +9,9 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-LABEL: test_x86_fmadd_baa_ss: ; FMA: # %bb.0: -; FMA-NEXT: vmovaps (%rcx), %xmm1 ; FMA-NEXT: vmovaps (%rdx), %xmm0 -; FMA-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; FMA-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -103,9 +103,9 @@ declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x do define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; FMA-LABEL: test_x86_fmadd_baa_sd: ; FMA: # %bb.0: -; FMA-NEXT: vmovapd (%rcx), %xmm1 ; FMA-NEXT: vmovapd (%rdx), %xmm0 -; FMA-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; FMA-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -198,9 +198,9 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x floa define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-LABEL: test_x86_fnmadd_baa_ss: ; FMA: # %bb.0: -; FMA-NEXT: vmovaps (%rcx), %xmm1 ; FMA-NEXT: vmovaps (%rdx), %xmm0 -; FMA-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; FMA-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -292,9 +292,9 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x d define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; FMA-LABEL: test_x86_fnmadd_baa_sd: ; FMA: # %bb.0: -; FMA-NEXT: vmovapd (%rcx), %xmm1 ; FMA-NEXT: vmovapd (%rdx), %xmm0 -; FMA-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; FMA-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -386,9 +386,9 @@ declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-LABEL: test_x86_fmsub_baa_ss: ; FMA: # %bb.0: -; FMA-NEXT: vmovaps (%rcx), %xmm1 ; FMA-NEXT: vmovaps (%rdx), %xmm0 -; FMA-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; FMA-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -480,9 +480,9 @@ declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x do define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; FMA-LABEL: test_x86_fmsub_baa_sd: ; FMA: # %bb.0: -; FMA-NEXT: vmovapd (%rcx), %xmm1 ; FMA-NEXT: vmovapd (%rdx), %xmm0 -; FMA-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; FMA-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -575,9 +575,9 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x floa define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA-LABEL: test_x86_fnmsub_baa_ss: ; FMA: # %bb.0: -; FMA-NEXT: vmovaps (%rcx), %xmm1 ; FMA-NEXT: vmovaps (%rdx), %xmm0 -; FMA-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; FMA-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -669,9 +669,9 @@ declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x d define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { ; FMA-LABEL: test_x86_fnmsub_baa_sd: ; FMA: # %bb.0: -; FMA-NEXT: vmovapd (%rcx), %xmm1 ; FMA-NEXT: vmovapd (%rdx), %xmm0 -; FMA-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0 +; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; FMA-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll b/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll index c3d454c8ed9..5e30726555e 100644 --- a/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/fma-intrinsics-x86.ll @@ -19,10 +19,11 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -45,10 +46,11 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res @@ -70,10 +72,11 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -96,10 +99,11 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> % ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res @@ -222,10 +226,11 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -248,10 +253,11 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res @@ -273,10 +279,11 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -299,10 +306,11 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> % ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res @@ -425,10 +433,11 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -451,10 +460,11 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res @@ -476,10 +486,11 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -502,10 +513,11 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res @@ -628,10 +640,11 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -654,10 +667,11 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1 ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res @@ -679,10 +693,11 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a] ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] -; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -705,10 +720,11 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd: ; CHECK-FMA-WIN: # %bb.0: -; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] -; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00] -; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem +; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] +; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero +; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01] +; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/fma-scalar-memfold.ll b/llvm/test/CodeGen/X86/fma-scalar-memfold.ll index 016a78a8dd3..0cdf2707664 100644 --- a/llvm/test/CodeGen/X86/fma-scalar-memfold.ll +++ b/llvm/test/CodeGen/X86/fma-scalar-memfold.ll @@ -44,7 +44,7 @@ define void @fmadd_aba_ss(float* %a, float* %b) { ; CHECK-LABEL: fmadd_aba_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -96,7 +96,7 @@ define void @fmsub_aba_ss(float* %a, float* %b) { ; CHECK-LABEL: fmsub_aba_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfmsub132ss (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -148,7 +148,7 @@ define void @fnmadd_aba_ss(float* %a, float* %b) { ; CHECK-LABEL: fnmadd_aba_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfnmadd132ss (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -200,7 +200,7 @@ define void @fnmsub_aba_ss(float* %a, float* %b) { ; CHECK-LABEL: fnmsub_aba_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfnmsub132ss (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -226,8 +226,8 @@ define void @fmadd_aab_sd(double* %a, double* %b) { ; CHECK-LABEL: fmadd_aab_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -248,8 +248,8 @@ define void @fmadd_aba_sd(double* %a, double* %b) { ; CHECK-LABEL: fmadd_aba_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -270,8 +270,8 @@ define void @fmsub_aab_sd(double* %a, double* %b) { ; CHECK-LABEL: fmsub_aab_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -292,8 +292,8 @@ define void @fmsub_aba_sd(double* %a, double* %b) { ; CHECK-LABEL: fmsub_aba_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmsub132sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -314,8 +314,8 @@ define void @fnmadd_aab_sd(double* %a, double* %b) { ; CHECK-LABEL: fnmadd_aab_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -336,8 +336,8 @@ define void @fnmadd_aba_sd(double* %a, double* %b) { ; CHECK-LABEL: fnmadd_aba_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfnmadd132sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -358,8 +358,8 @@ define void @fnmsub_aab_sd(double* %a, double* %b) { ; CHECK-LABEL: fnmsub_aab_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 @@ -380,8 +380,8 @@ define void @fnmsub_aba_sd(double* %a, double* %b) { ; CHECK-LABEL: fnmsub_aba_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfnmsub132sd (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vmovlpd %xmm0, (%rdi) +; CHECK-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 |

