diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-05 02:52:56 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-05 02:52:56 +0000 |
| commit | 95eb88abfe17c1b1faeb5252554117d45cb2f50e (patch) | |
| tree | afc6a1450af5ffd6775b995d74a20436d2e77c9d /llvm/test/CodeGen/X86 | |
| parent | e4b9257b697e89377964cc3a72225e94f35025eb (diff) | |
| download | bcm5719-llvm-95eb88abfe17c1b1faeb5252554117d45cb2f50e.tar.gz bcm5719-llvm-95eb88abfe17c1b1faeb5252554117d45cb2f50e.zip | |
[X86] Add support for combining FMSUB/FNMADD/FNMSUB ISD nodes with an fneg input.
Previously we could only negate the FMADD opcodes. This used to be mostly ok when we lowered FMA intrinsics during lowering. But with the move to llvm.fma from target specific intrinsics, we can combine (fneg (fma)) to (fmsub) earlier. So if we start with (fneg (fma (fneg))) we would get stuck at (fmsub (fneg)).
This patch fixes that so we can also combine things like (fmsub (fneg)).
llvm-svn: 336304
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-fneg-combine.ll | 16 |
3 files changed, 14 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll index f408f66eef6..9ebc3b8941b 100644 --- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll @@ -65,16 +65,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; X32-LABEL: test4: ; X32: # %bb.0: # %entry -; X32-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 +; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X32-NEXT: retl ; ; X64-LABEL: test4: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 +; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X64-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll index c33ae5e6467..b020fdd01ea 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll @@ -2507,25 +2507,17 @@ define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> ; X86-LABEL: test_mm_mask3_fnmsub_round_ss: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] -; X86-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] -; X86-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 5, value: LCPI119_0, kind: FK_Data_4 -; X86-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] -; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] +; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_mm_mask3_fnmsub_round_ss: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] -; X64-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 5, value: LCPI119_0-4, kind: reloc_riprel_4byte -; X64-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb] ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] -; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] +; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] ; X64-NEXT: retq ## encoding: [0xc3] entry: @@ -3133,21 +3125,17 @@ define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x doubl ; X86-LABEL: test_mm_mask3_fnmsub_round_sd: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] -; X86-NEXT: vxorpd LCPI143_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 4, value: LCPI143_0, kind: FK_Data_4 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] -; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] +; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_mm_mask3_fnmsub_round_sd: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 4, value: LCPI143_0-4, kind: reloc_riprel_4byte ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] -; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] +; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] ; X64-NEXT: retq ## encoding: [0xc3] entry: diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll index 1d698c77438..6d02eaec36f 100644 --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -78,18 +78,10 @@ entry: define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { -; SKX-LABEL: test7: -; SKX: # %bb.0: # %entry -; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2 -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 -; SKX-NEXT: retq -; -; KNL-LABEL: test7: -; KNL: # %bb.0: # %entry -; KNL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; KNL-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 -; KNL-NEXT: retq +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; CHECK-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |

