[AVX] Improve insertion of i8 or i16 into low element of 256-bit zero vector

Without this patch, we split the 256-bit vector into halves and produced something like: movzwl (%rdi), %eax vmovd %eax, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vblendps $15, %ymm0, %ymm1, %ymm0 ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] Now, we eliminate the xor and blend because those zeros are free with the vmovd: movzwl (%rdi), %eax vmovd %eax, %xmm0 This should be the final fix needed to resolve PR22685: https://llvm.org/bugs/show_bug.cgi?id=22685 llvm-svn: 233941
author: Sanjay Patel <spatel@rotateright.com> 2015-04-02 20:21:52 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-04-02 20:21:52 +0000
commit: eca590ffb3b2f8d73fa93e66dea5b2c380a527df (patch)
tree: dd20212b5674d73ed088d921d641df3e8d83cb09 /llvm/test/CodeGen
parent: ff0cf4f56df4177032c6357910ca50d33b77bfcd (diff)
download: bcm5719-llvm-eca590ffb3b2f8d73fa93e66dea5b2c380a527df.tar.gz
bcm5719-llvm-eca590ffb3b2f8d73fa93e66dea5b2c380a527df.zip
2 files changed, 12 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index aad37022d27..df4994da693 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -3249,3 +3249,15 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   ret <16 x i16> %shuffle
 }
+
+define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
+; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
+; ALL:       # BB#0:
+; ALL-NEXT:    movzwl (%rdi), %eax
+; ALL-NEXT:    vmovd %eax, %xmm0
+; ALL-NEXT:    retq
+  %val = load i16, i16* %ptr
+  %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
+  ret <16 x i16> %i0
+}
+
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index f9f4b96be3c..a0f43de7563 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -656,8 +656,6 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
 ; AVX2-NEXT:    movl $15, %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vpblendd $15, %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
author	Sanjay Patel <spatel@rotateright.com>	2015-04-02 20:21:52 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-04-02 20:21:52 +0000
commit	eca590ffb3b2f8d73fa93e66dea5b2c380a527df (patch)
tree	dd20212b5674d73ed088d921d641df3e8d83cb09 /llvm/test/CodeGen
parent	ff0cf4f56df4177032c6357910ca50d33b77bfcd (diff)
download	bcm5719-llvm-eca590ffb3b2f8d73fa93e66dea5b2c380a527df.tar.gz bcm5719-llvm-eca590ffb3b2f8d73fa93e66dea5b2c380a527df.zip