diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-11-06 23:16:38 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-11-06 23:16:38 +0000 |
| commit | 68614a36d114ba390ce97576b71717b8d7c96b6f (patch) | |
| tree | 8d199b08583a68e4ffe4fa1fccfc5078923f994f /llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | |
| parent | b126f6b6c8ed31a00ef9dacf478138e43c41810f (diff) | |
| download | bcm5719-llvm-68614a36d114ba390ce97576b71717b8d7c96b6f.tar.gz bcm5719-llvm-68614a36d114ba390ce97576b71717b8d7c96b6f.zip | |
[X86] Don't fold non-LSB extracts into truncating broadcasts.
We used to incorrectly assume that the offset we're extracting from
was a multiple of the element size. So, we'd fold:
(v8i16 (shufflevector
(v8i16 (bitcast (v4i32 (build_vector X, Y, ...)))),
<1,1,...,1>))
into:
(v8i16 (vbroadcast (i16 (trunc Y))))
whereas we should have extracted the higher bits from X.
Instead, bail out if the assumption doesn't hold.
llvm-svn: 252361
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index f040a691938..eb69c6e9a33 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2228,3 +2228,145 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %tmp4 } + +define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { +; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32: +; SSE2: # BB#0: +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32: +; SSE41: # BB#0: +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_elt1_mem_v8i16_i32: +; AVX: # BB#0: +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX-NEXT: retq + %tmp = load i32, i32* %ptr, align 4 + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 + %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> + %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i16> %tmp3 +} + +define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { +; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: +; SSE2: # BB#0: +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: +; SSE41: # BB#0: +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_elt3_mem_v8i16_i32: +; AVX: # BB#0: +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX-NEXT: retq + %tmp = load i32, i32* %ptr, align 4 + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 + %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> + %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + ret <8 x i16> %tmp3 +} + +define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { +; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: +; SSE2: # BB#0: +; SSE2-NEXT: movswl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: +; SSSE3: # BB#0: +; SSSE3-NEXT: movswl (%rdi), %eax +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: +; SSE41: # BB#0: +; SSE41-NEXT: movswl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: +; AVX: # BB#0: +; AVX-NEXT: movswl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX-NEXT: retq + %tmp = load i16, i16* %ptr, align 2 + %tmp1 = sext i16 %tmp to i32 + %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 + %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> + %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i16> %tmp4 +} + +define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { +; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: +; SSE2: # BB#0: +; SSE2-NEXT: movswl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: +; SSSE3: # BB#0: +; SSSE3-NEXT: movswl (%rdi), %eax +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: +; SSE41: # BB#0: +; SSE41-NEXT: movswl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: +; AVX: # BB#0: +; AVX-NEXT: movswl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX-NEXT: retq + %tmp = load i16, i16* %ptr, align 2 + %tmp1 = sext i16 %tmp to i32 + %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 + %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> + %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + ret <8 x i16> %tmp4 +} |

