summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-05 19:09:02 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-05 19:09:02 +0000
commit784fa8a4e30a6a70a09a1d8008515b43e00104d7 (patch)
tree9f3152d2fd96c28426beb19db617cc3439c62d3b /llvm/test
parent80528702c9f54212813231d80c63a9a599e40e60 (diff)
downloadbcm5719-llvm-784fa8a4e30a6a70a09a1d8008515b43e00104d7.tar.gz
bcm5719-llvm-784fa8a4e30a6a70a09a1d8008515b43e00104d7.zip
[X86] Remove unnecessary (v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X)))) patterns
We had already disabled the pattern for SSE4.1 and SSE4.2. But it got re-enabled for AVX and AVX512. With SSE41 we rely on a separate (v4f32 (X86vzmovl VR128)) pattern to select blendps with a xorps to create zeroess. And a separate (v4f32 (scalar_to_vector FR32X)) to select a COPY_TO_REG_CLASS to move FR32 to VR128 The same thing can happen for AVX with vblendps and those separate patterns already exist. For AVX512, (v4f32 (X86vzmov VR128)) will select a VMOVSS instruction instead of VBLENDPS due to their not being a EVEX VBLENDPS. This is what we were getting out of the larger pattern anyway. So the larger pattern is unneeded for AVX512 too. For SSE1-SSSE3 we can rely on (v4f32 (X86vzmov VR128)) selecting a MOVSS similar to AVX512. Again this is what the larger pattern did too. So the only real change here is that AVX1/2 now properly outputs a VBLENDPS during isel instead of a VMOVSS to match SSE41. Most tests didn't notice because the two address instruction pass knows how to turn VMOVSS into VBLENDPS to get an independent destination register. llvm-svn: 312564
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vec_ss_load_fold.ll8
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
index a74a4ed36d7..1010f97ccdb 100644
--- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -35,9 +35,9 @@ define i16 @test1(float %f) nounwind {
; X32_AVX1-LABEL: test1:
; X32_AVX1: ## BB#0:
; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
+; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X32_AVX1-NEXT: vminss LCPI0_2, %xmm0, %xmm0
; X32_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
@@ -47,9 +47,9 @@ define i16 @test1(float %f) nounwind {
;
; X64_AVX1-LABEL: test1:
; X64_AVX1: ## BB#0:
-; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X64_AVX1-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
@@ -60,9 +60,9 @@ define i16 @test1(float %f) nounwind {
; X32_AVX512-LABEL: test1:
; X32_AVX512: ## BB#0:
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
+; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0
; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
@@ -72,9 +72,9 @@ define i16 @test1(float %f) nounwind {
;
; X64_AVX512-LABEL: test1:
; X64_AVX512: ## BB#0:
-; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
OpenPOWER on IntegriCloud