summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
diff options
context:
space:
mode:
authorAyman Musa <ayman.musa@intel.com>2017-09-03 13:53:44 +0000
committerAyman Musa <ayman.musa@intel.com>2017-09-03 13:53:44 +0000
commitef8f61bce6687c86e2abfcb943a7f5f29ec1911a (patch)
tree82ff8230a1824aca5d2cfe6f6e7197fb5da6a5cf /llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
parent29a5ee358e541acf9cd6427465a691fbccc84483 (diff)
downloadbcm5719-llvm-ef8f61bce6687c86e2abfcb943a7f5f29ec1911a.tar.gz
bcm5719-llvm-ef8f61bce6687c86e2abfcb943a7f5f29ec1911a.zip
[X86][AVX512] Add simple tests for all AVX512 shuffle instructions.
Throughout an effort to strongly check the behavior of CodeGen with the IR shufflevector instruction we generated many tests while predicting the best X86 sequence that may be generated. This is a subset of the generated tests that we think may add value to our X86 set of tests. Some of the checks are not optimal and will be changed after fixing: 1. PR34394 2. PR34382 3. PR34380 4. PR34359 Differential Revision: https://reviews.llvm.org/D37329 llvm-svn: 312442
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll2623
1 files changed, 2623 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
new file mode 100644
index 00000000000..f0bbcba5598
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
@@ -0,0 +1,2623 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=skx %s -o - | FileCheck %s
+
+define <16 x i8> @test_i8_to_16(i8 %s) {
+; CHECK-LABEL: test_i8_to_16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <32 x i8> @test_i8_to_32(i8 %s) {
+; CHECK-LABEL: test_i8_to_32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1265798160, %eax # imm = 0xB48D73F0
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1265798160, %eax # imm = 0xB48D73F0
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1183839537, %eax # imm = 0x468FF531
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1183839537, %eax # imm = 0x468FF531
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-809048538, %eax # imm = 0xCFC6E626
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-809048538, %eax # imm = 0xCFC6E626
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-646452858, %eax # imm = 0xD977E986
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-646452858, %eax # imm = 0xD977E986
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <64 x i8> @test_i8_to_64(i8 %s) {
+; CHECK-LABEL: test_i8_to_64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $4127638692029284353, %rax # imm = 0x394851856F904001
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $4127638692029284353, %rax # imm = 0x394851856F904001
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2719977871742575617, %rax # imm = 0x25BF4D769A23A401
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2719977871742575617, %rax # imm = 0x25BF4D769A23A401
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $4380017386678030849, %rax # imm = 0x3CC8F29B5AFA9201
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $4380017386678030849, %rax # imm = 0x3CC8F29B5AFA9201
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2673371376007625217, %rax # imm = 0x2519B91A33A1BA01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2673371376007625217, %rax # imm = 0x2519B91A33A1BA01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <8 x i16> @test_i16_to_8(i16 %s) {
+; CHECK-LABEL: test_i16_to_8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $115, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $115, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-88, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-88, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-23, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-23, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <16 x i16> @test_i16_to_16(i16 %s) {
+; CHECK-LABEL: test_i16_to_16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-13546, %ax # imm = 0xCB16
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-13546, %ax # imm = 0xCB16
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $5399, %ax # imm = 0x1517
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $5399, %ax # imm = 0x1517
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-25377, %ax # imm = 0x9CDF
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-25377, %ax # imm = 0x9CDF
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $31879, %ax # imm = 0x7C87
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $31879, %ax # imm = 0x7C87
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <32 x i16> @test_i16_to_32(i16 %s) {
+; CHECK-LABEL: test_i16_to_32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1588505078, %eax # imm = 0xA151560A
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1588505078, %eax # imm = 0xA151560A
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-665386747, %eax # imm = 0xD8570105
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-665386747, %eax # imm = 0xD8570105
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1043830049, %eax # imm = 0x3E379521
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1043830049, %eax # imm = 0x3E379521
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1153245016, %eax # imm = 0xBB42E0A8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1153245016, %eax # imm = 0xBB42E0A8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <4 x i32> @test_i32_to_4(i32 %s) {
+; CHECK-LABEL: test_i32_to_4:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $3, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $3, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $4, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $4, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <8 x i32> @test_i32_to_8(i32 %s) {
+; CHECK-LABEL: test_i32_to_8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-48, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-48, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $26, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $26, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $38, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $38, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-78, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-78, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <16 x i32> @test_i32_to_16(i32 %s) {
+; CHECK-LABEL: test_i32_to_16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $2965, %ax # imm = 0xB95
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $2965, %ax # imm = 0xB95
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-27928, %ax # imm = 0x92E8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-27928, %ax # imm = 0x92E8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-5899, %ax # imm = 0xE8F5
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-5899, %ax # imm = 0xE8F5
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-30527, %ax # imm = 0x88C1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-30527, %ax # imm = 0x88C1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <2 x i64> @test_i64_to_2(i64 %s) {
+; CHECK-LABEL: test_i64_to_2:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_2_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> %default
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_2_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_2_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> %default
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_2_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ ret <2 x i64> %res
+}
+define <4 x i64> @test_i64_to_4(i64 %s) {
+; CHECK-LABEL: test_i64_to_4:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $9, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $9, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $8, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $8, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $7, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $7, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $5, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $5, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <8 x i64> @test_i64_to_8(i64 %s) {
+; CHECK-LABEL: test_i64_to_8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $95, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $95, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-6, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-6, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $6, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $6, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <16 x i8> @test_i8_to_16_mem(i8* %p) {
+; CHECK-LABEL: test_i8_to_16_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-2555, %ax # imm = 0xF605
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-2555, %ax # imm = 0xF605
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $12122, %ax # imm = 0x2F5A
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $12122, %ax # imm = 0x2F5A
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $8120, %ax # imm = 0x1FB8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $8120, %ax # imm = 0x1FB8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $13800, %ax # imm = 0x35E8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $13800, %ax # imm = 0x35E8
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ ret <16 x i8> %res
+}
+define <32 x i8> @test_i8_to_32_mem(i8* %p) {
+; CHECK-LABEL: test_i8_to_32_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-855786879, %eax # imm = 0xCCFDBA81
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-855786879, %eax # imm = 0xCCFDBA81
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-270715404, %eax # imm = 0xEFDD35F4
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-270715404, %eax # imm = 0xEFDD35F4
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $97850418, %eax # imm = 0x5D51432
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $97850418, %eax # imm = 0x5D51432
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_32_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1828018964, %eax # imm = 0x6CF55B14
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $1828018964, %eax # imm = 0x6CF55B14
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ ret <32 x i8> %res
+}
+define <64 x i8> @test_i8_to_64_mem(i8* %p) {
+; CHECK-LABEL: test_i8_to_64_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $468087142555171329, %rax # imm = 0x67EFAC6AFEDBA01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $468087142555171329, %rax # imm = 0x67EFAC6AFEDBA01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $1198668921668790785, %rax # imm = 0x10A287088F5E6A01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $1198668921668790785, %rax # imm = 0x10A287088F5E6A01
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $1018395262988968961, %rax # imm = 0xE2211189365E401
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $1018395262988968961, %rax # imm = 0xE2211189365E401
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default) {
+; CHECK-LABEL: test_masked_i8_to_64_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2983418297125630465, %rax # imm = 0x29673B226892F201
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p) {
+; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movabsq $2983418297125630465, %rax # imm = 0x29673B226892F201
+; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i8, i8* %p
+ %vec = insertelement <2 x i8> undef, i8 %s, i32 0
+ %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ ret <64 x i8> %res
+}
+define <8 x i16> @test_i16_to_8_mem(i16* %p) {
+; CHECK-LABEL: test_i16_to_8_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $89, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $89, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $46, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $46, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-128, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-128, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $11, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $11, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ ret <8 x i16> %res
+}
+define <16 x i16> @test_i16_to_16_mem(i16* %p) {
+; CHECK-LABEL: test_i16_to_16_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $6614, %ax # imm = 0x19D6
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $6614, %ax # imm = 0x19D6
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-772, %ax # imm = 0xFCFC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $-772, %ax # imm = 0xFCFC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $13065, %ax # imm = 0x3309
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $13065, %ax # imm = 0x3309
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $23498, %ax # imm = 0x5BCA
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $23498, %ax # imm = 0x5BCA
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ ret <16 x i16> %res
+}
+define <32 x i16> @test_i16_to_32_mem(i16* %p) {
+; CHECK-LABEL: test_i16_to_32_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1152776498, %eax # imm = 0xBB4A06CE
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-1152776498, %eax # imm = 0xBB4A06CE
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-694382116, %eax # imm = 0xD69C91DC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-694382116, %eax # imm = 0xD69C91DC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-350116879, %eax # imm = 0xEB21A3F1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $-350116879, %eax # imm = 0xEB21A3F1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default) {
+; CHECK-LABEL: test_masked_i16_to_32_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $727673142, %eax # imm = 0x2B5F6936
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> %default
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p) {
+; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $727673142, %eax # imm = 0x2B5F6936
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i16, i16* %p
+ %vec = insertelement <2 x i16> undef, i16 %s, i32 0
+ %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ ret <32 x i16> %res
+}
+define <4 x i32> @test_i32_to_4_mem(i32* %p) {
+; CHECK-LABEL: test_i32_to_4_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $13, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $13, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_4_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $8, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $8, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ ret <4 x i32> %res
+}
+define <8 x i32> @test_i32_to_8_mem(i32* %p) {
+; CHECK-LABEL: test_i32_to_8_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-41, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-41, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $87, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $87, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $64, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $64, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-104, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-104, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
+define <16 x i32> @test_i32_to_16_mem(i32* %p) {
+; CHECK-LABEL: test_i32_to_16_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default) {
+; CHECK-LABEL: test_masked_i32_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p) {
+; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i32, i32* %p
+ %vec = insertelement <2 x i32> undef, i32 %s, i32 0
+ %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ ret <16 x i32> %res
+}
+define <2 x i64> @test_i64_to_2_mem(i64* %p) {
+; CHECK-LABEL: test_i64_to_2_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_2_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> %default
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $2, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_2_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> %default
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ ret <2 x i64> %res
+}
+define <4 x i64> @test_i64_to_4_mem(i64* %p) {
+; CHECK-LABEL: test_i64_to_4_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $5, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $5, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $14, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $14, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $11, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $11, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_4_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $3, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $3, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ ret <4 x i64> %res
+}
+define <8 x i64> @test_i64_to_8_mem(i64* %p) {
+; CHECK-LABEL: test_i64_to_8_mem:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-113, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask0:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-113, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $4, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $4, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-67, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $-67, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default) {
+; CHECK-LABEL: test_masked_i64_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $86, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p) {
+; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb $86, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %s = load i64, i64* %p
+ %vec = insertelement <2 x i64> undef, i64 %s, i32 0
+ %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ ret <8 x i64> %res
+}
OpenPOWER on IntegriCloud