diff options
-rw-r--r-- | llvm/test/CodeGen/X86/build-vector-128.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/movddup-load-fold.ll | 26 |
2 files changed, 26 insertions, 48 deletions
diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index d859be16083..9702e4be22a 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -508,54 +508,6 @@ define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { ret <4 x i32> %splat } -define <4 x float> @movddup_load_fold(float %x, float %y) { -; SSE2-32-LABEL: movddup_load_fold: -; SSE2-32: # %bb.0: -; SSE2-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; SSE2-32-NEXT: retl -; -; SSE2-64-LABEL: movddup_load_fold: -; SSE2-64: # %bb.0: -; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; SSE2-64-NEXT: retq -; -; SSE41-32-LABEL: movddup_load_fold: -; SSE41-32: # %bb.0: -; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-32-NEXT: retl -; -; SSE41-64-LABEL: movddup_load_fold: -; SSE41-64: # %bb.0: -; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE41-64-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; SSE41-64-NEXT: retq -; -; AVX-32-LABEL: movddup_load_fold: -; AVX-32: # %bb.0: -; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; AVX-32-NEXT: retl -; -; AVX1-64-LABEL: movddup_load_fold: -; AVX1-64: # %bb.0: -; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX1-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; AVX1-64-NEXT: retq -; -; AVX2-64-LABEL: movddup_load_fold: -; AVX2-64: # %bb.0: -; AVX2-64-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; AVX2-64-NEXT: retq - %i0 = insertelement <4 x float> zeroinitializer, float %x, i32 0 - %i1 = insertelement <4 x float> %i0, float %y, i32 1 - %dup = shufflevector <4 x float> %i1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - ret <4 x float> %dup -} - ; PR37502 - https://bugs.llvm.org/show_bug.cgi?id=37502 ; Don't use a series of insertps when movddup will do. diff --git a/llvm/test/CodeGen/X86/movddup-load-fold.ll b/llvm/test/CodeGen/X86/movddup-load-fold.ll new file mode 100644 index 00000000000..a0e65fb892d --- /dev/null +++ b/llvm/test/CodeGen/X86/movddup-load-fold.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=i686-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=i686-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL + +; Test an isel pattern for a splatted VZLOAD. + +define <4 x float> @movddup_load_fold(float %x, float %y) { +; SSE-LABEL: movddup_load_fold: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE-NEXT: retl +; +; AVX-LABEL: movddup_load_fold: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: retl + %i0 = insertelement <4 x float> zeroinitializer, float %x, i32 0 + %i1 = insertelement <4 x float> %i0, float %y, i32 1 + %dup = shufflevector <4 x float> %i1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + ret <4 x float> %dup +} + |