diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-12 00:54:40 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-12 00:54:40 +0000 |
| commit | be996bd2d9e73d365c395618dd0d81a2930ec650 (patch) | |
| tree | fda36d6af3af951c5c7a6c4d4a36062232f42182 /llvm/test | |
| parent | 1c8234f639d8e9c5a80a154e395069c4d6c32704 (diff) | |
| download | bcm5719-llvm-be996bd2d9e73d365c395618dd0d81a2930ec650.tar.gz bcm5719-llvm-be996bd2d9e73d365c395618dd0d81a2930ec650.zip | |
[X86] Add patterns to use VMOVSS/SD zero masking for scalar f32/f64 select with zero.
These showed up in some of the upgraded FMA code. We really need to improve these test cases more, but this helps for now.
llvm-svn: 336875
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 10 |
2 files changed, 12 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index a37129aaf69..55049ead617 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -9083,9 +9083,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { ; X86-NEXT: vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01] ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] -; X86-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8] -; X86-NEXT: vmovss %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0a] +; X86-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0] +; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: fmadd_ss_maskz_memfold: @@ -9095,9 +9094,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { ; X64-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06] ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] -; X64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8] -; X64-NEXT: vmovss %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0f] +; X64-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0] +; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] ; X64-NEXT: retq ## encoding: [0xc3] %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 @@ -9173,9 +9171,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { ; X86-NEXT: vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01] ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] -; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8] -; X86-NEXT: vmovsd %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0a] +; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0] +; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: fmadd_sd_maskz_memfold: @@ -9185,9 +9182,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { ; X64-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06] ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] -; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8] -; X64-NEXT: vmovsd %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0f] +; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0] +; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-NEXT: retq ## encoding: [0xc3] %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index c538972266b..1b3066086b0 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4629,9 +4629,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovss %xmm1, (%rdi) +; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a %av0 = insertelement <4 x float> undef, float %a.val, i32 0 @@ -4693,9 +4692,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovsd %xmm1, (%rdi) +; CHECK-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a %av0 = insertelement <2 x double> undef, double %a.val, i32 0 |

