summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll1656
1 files changed, 920 insertions, 736 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
index b466a2f9ebc..a8533a6f7a1 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll
@@ -10,104 +10,112 @@ define <16 x i8> @test_i8_to_16(i8 %s) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s) {
+define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s) {
+define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s) {
+define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s) {
+define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
define <32 x i8> @test_i8_to_32(i8 %s) {
@@ -119,104 +127,112 @@ define <32 x i8> @test_i8_to_32(i8 %s) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1265798160, %eax # imm = 0xB48D73F0
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s) {
+define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1265798160, %eax # imm = 0xB48D73F0
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1183839537, %eax # imm = 0x468FF531
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s) {
+define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1183839537, %eax # imm = 0x468FF531
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-809048538, %eax # imm = 0xCFC6E626
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s) {
+define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-809048538, %eax # imm = 0xCFC6E626
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-646452858, %eax # imm = 0xD977E986
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s) {
+define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-646452858, %eax # imm = 0xD977E986
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
define <64 x i8> @test_i8_to_64(i8 %s) {
@@ -228,104 +244,112 @@ define <64 x i8> @test_i8_to_64(i8 %s) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $4127638692029284353, %rax # imm = 0x394851856F904001
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s) {
+define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $4127638692029284353, %rax # imm = 0x394851856F904001
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2719977871742575617, %rax # imm = 0x25BF4D769A23A401
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s) {
+define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2719977871742575617, %rax # imm = 0x25BF4D769A23A401
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $4380017386678030849, %rax # imm = 0x3CC8F29B5AFA9201
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s) {
+define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $4380017386678030849, %rax # imm = 0x3CC8F29B5AFA9201
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2673371376007625217, %rax # imm = 0x2519B91A33A1BA01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s) {
+define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2673371376007625217, %rax # imm = 0x2519B91A33A1BA01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
define <8 x i16> @test_i16_to_8(i16 %s) {
@@ -337,104 +361,112 @@ define <8 x i16> @test_i16_to_8(i16 %s) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $115, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s) {
+define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $115, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-88, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s) {
+define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-88, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s) {
+define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-23, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s) {
+define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-23, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
define <16 x i16> @test_i16_to_16(i16 %s) {
@@ -446,104 +478,112 @@ define <16 x i16> @test_i16_to_16(i16 %s) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-13546, %ax # imm = 0xCB16
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s) {
+define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-13546, %ax # imm = 0xCB16
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $5399, %ax # imm = 0x1517
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s) {
+define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $5399, %ax # imm = 0x1517
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-25377, %ax # imm = 0x9CDF
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s) {
+define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-25377, %ax # imm = 0x9CDF
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $31879, %ax # imm = 0x7C87
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s) {
+define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $31879, %ax # imm = 0x7C87
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <32 x i16> @test_i16_to_32(i16 %s) {
@@ -555,104 +595,112 @@ define <32 x i16> @test_i16_to_32(i16 %s) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1588505078, %eax # imm = 0xA151560A
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s) {
+define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1588505078, %eax # imm = 0xA151560A
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-665386747, %eax # imm = 0xD8570105
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s) {
+define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-665386747, %eax # imm = 0xD8570105
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1043830049, %eax # imm = 0x3E379521
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s) {
+define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1043830049, %eax # imm = 0x3E379521
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1153245016, %eax # imm = 0xBB42E0A8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s) {
+define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1153245016, %eax # imm = 0xBB42E0A8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <4 x i32> @test_i32_to_4(i32 %s) {
@@ -664,104 +712,112 @@ define <4 x i32> @test_i32_to_4(i32 %s) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s) {
+define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s) {
+define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s) {
+define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $4, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s) {
+define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $4, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
define <8 x i32> @test_i32_to_8(i32 %s) {
@@ -773,104 +829,112 @@ define <8 x i32> @test_i32_to_8(i32 %s) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-48, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s) {
+define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-48, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $26, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s) {
+define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $26, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $38, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s) {
+define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $38, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-78, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s) {
+define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-78, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <16 x i32> @test_i32_to_16(i32 %s) {
@@ -882,104 +946,112 @@ define <16 x i32> @test_i32_to_16(i32 %s) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $2965, %ax # imm = 0xB95
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s) {
+define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $2965, %ax # imm = 0xB95
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-27928, %ax # imm = 0x92E8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s) {
+define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-27928, %ax # imm = 0x92E8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-5899, %ax # imm = 0xE8F5
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s) {
+define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-5899, %ax # imm = 0xE8F5
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-30527, %ax # imm = 0x88C1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s) {
+define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-30527, %ax # imm = 0x88C1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <2 x i64> @test_i64_to_2(i64 %s) {
@@ -991,54 +1063,58 @@ define <2 x i64> @test_i64_to_2(i64 %s) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default) {
+define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_2_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> %default
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s) {
+define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_2_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default) {
+define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_2_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> %default
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s) {
+define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_2_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
ret <2 x i64> %res
}
define <4 x i64> @test_i64_to_4(i64 %s) {
@@ -1050,104 +1126,112 @@ define <4 x i64> @test_i64_to_4(i64 %s) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $9, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s) {
+define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $9, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $8, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s) {
+define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $8, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $7, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s) {
+define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $7, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $5, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s) {
+define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $5, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <8 x i64> @test_i64_to_8(i64 %s) {
@@ -1159,104 +1243,112 @@ define <8 x i64> @test_i64_to_8(i64 %s) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $95, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s) {
+define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $95, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-6, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s) {
+define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-6, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s) {
+define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $6, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s) {
+define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $6, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
define <16 x i8> @test_i8_to_16_mem(i8* %p) {
@@ -1269,112 +1361,120 @@ define <16 x i8> @test_i8_to_16_mem(i8* %p) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-2555, %ax # imm = 0xF605
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p) {
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-2555, %ax # imm = 0xF605
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $12122, %ax # imm = 0x2F5A
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p) {
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $12122, %ax # imm = 0x2F5A
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $8120, %ax # imm = 0x1FB8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p) {
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $8120, %ax # imm = 0x1FB8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default) {
+define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $13800, %ax # imm = 0x35E8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> %default
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
ret <16 x i8> %res
}
-define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p) {
+define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p, <16 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $13800, %ax # imm = 0x35E8
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i8> %shuf, <16 x i8> zeroinitializer
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
define <32 x i8> @test_i8_to_32_mem(i8* %p) {
@@ -1387,112 +1487,120 @@ define <32 x i8> @test_i8_to_32_mem(i8* %p) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-855786879, %eax # imm = 0xCCFDBA81
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p) {
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-855786879, %eax # imm = 0xCCFDBA81
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-270715404, %eax # imm = 0xEFDD35F4
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p) {
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-270715404, %eax # imm = 0xEFDD35F4
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $97850418, %eax # imm = 0x5D51432
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p) {
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $97850418, %eax # imm = 0x5D51432
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default) {
+define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_32_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1828018964, %eax # imm = 0x6CF55B14
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> %default
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
ret <32 x i8> %res
}
-define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p) {
+define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p, <32 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $1828018964, %eax # imm = 0x6CF55B14
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, <32 x i8> %shuf, <32 x i8> zeroinitializer
+ %cmp = icmp eq <32 x i8> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
define <64 x i8> @test_i8_to_64_mem(i8* %p) {
@@ -1505,112 +1613,120 @@ define <64 x i8> @test_i8_to_64_mem(i8* %p) {
%res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $468087142555171329, %rax # imm = 0x67EFAC6AFEDBA01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p) {
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $468087142555171329, %rax # imm = 0x67EFAC6AFEDBA01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $1198668921668790785, %rax # imm = 0x10A287088F5E6A01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p) {
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $1198668921668790785, %rax # imm = 0x10A287088F5E6A01
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $1018395262988968961, %rax # imm = 0xE2211189365E401
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p) {
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $1018395262988968961, %rax # imm = 0xE2211189365E401
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default) {
+define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_i8_to_64_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2983418297125630465, %rax # imm = 0x29673B226892F201
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> %default
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
ret <64 x i8> %res
}
-define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p) {
+define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p, <64 x i8> %mask) {
; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movabsq $2983418297125630465, %rax # imm = 0x29673B226892F201
-; CHECK-NEXT: kmovq %rax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastb (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i8, i8* %p
%vec = insertelement <2 x i8> undef, i8 %s, i32 0
%shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <64 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <64 x i8> %shuf, <64 x i8> zeroinitializer
+ %cmp = icmp eq <64 x i8> %mask, zeroinitializer
+ %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
ret <64 x i8> %res
}
define <8 x i16> @test_i16_to_8_mem(i16* %p) {
@@ -1623,112 +1739,120 @@ define <8 x i16> @test_i16_to_8_mem(i16* %p) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $89, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p) {
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $89, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $46, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p) {
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $46, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-128, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p) {
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-128, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default) {
+define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $11, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> %default
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
ret <8 x i16> %res
}
-define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p) {
+define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $11, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %shuf, <8 x i16> zeroinitializer
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
define <16 x i16> @test_i16_to_16_mem(i16* %p) {
@@ -1741,112 +1865,120 @@ define <16 x i16> @test_i16_to_16_mem(i16* %p) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $6614, %ax # imm = 0x19D6
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p) {
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $6614, %ax # imm = 0x19D6
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-772, %ax # imm = 0xFCFC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p) {
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $-772, %ax # imm = 0xFCFC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $13065, %ax # imm = 0x3309
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p) {
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $13065, %ax # imm = 0x3309
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default) {
+define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $23498, %ax # imm = 0x5BCA
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> %default
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
ret <16 x i16> %res
}
-define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p) {
+define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p, <16 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $23498, %ax # imm = 0x5BCA
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0>, <16 x i16> %shuf, <16 x i16> zeroinitializer
+ %cmp = icmp eq <16 x i16> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <32 x i16> @test_i16_to_32_mem(i16* %p) {
@@ -1859,112 +1991,120 @@ define <32 x i16> @test_i16_to_32_mem(i16* %p) {
%res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1152776498, %eax # imm = 0xBB4A06CE
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p) {
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-1152776498, %eax # imm = 0xBB4A06CE
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-694382116, %eax # imm = 0xD69C91DC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p) {
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-694382116, %eax # imm = 0xD69C91DC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-350116879, %eax # imm = 0xEB21A3F1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p) {
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-350116879, %eax # imm = 0xEB21A3F1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default) {
+define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_i16_to_32_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $727673142, %eax # imm = 0x2B5F6936
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> %default
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
ret <32 x i16> %res
}
-define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p) {
+define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p, <32 x i16> %mask) {
; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $727673142, %eax # imm = 0x2B5F6936
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastw (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i16, i16* %p
%vec = insertelement <2 x i16> undef, i16 %s, i32 0
%shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <32 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0>, <32 x i16> %shuf, <32 x i16> zeroinitializer
+ %cmp = icmp eq <32 x i16> %mask, zeroinitializer
+ %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <4 x i32> @test_i32_to_4_mem(i32* %p) {
@@ -1977,112 +2117,120 @@ define <4 x i32> @test_i32_to_4_mem(i32* %p) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p) {
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $10, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $13, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p) {
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $13, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p) {
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 0, i1 0>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default) {
+define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_4_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $8, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> %default
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
ret <4 x i32> %res
}
-define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p) {
+define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $8, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 0, i1 0, i1 1>, <4 x i32> %shuf, <4 x i32> zeroinitializer
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
define <8 x i32> @test_i32_to_8_mem(i32* %p) {
@@ -2095,112 +2243,120 @@ define <8 x i32> @test_i32_to_8_mem(i32* %p) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-41, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p) {
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-41, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $87, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p) {
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $87, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $64, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p) {
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $64, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default) {
+define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-104, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> %default
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
ret <8 x i32> %res
}
-define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p) {
+define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-104, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x i32> %shuf, <8 x i32> zeroinitializer
+ %cmp = icmp eq <8 x i32> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
ret <8 x i32> %res
}
define <16 x i32> @test_i32_to_16_mem(i32* %p) {
@@ -2213,112 +2369,120 @@ define <16 x i32> @test_i32_to_16_mem(i32* %p) {
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p) {
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p) {
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p) {
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default) {
+define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_i32_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> %default
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
ret <16 x i32> %res
}
-define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p) {
+define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i32, i32* %p
%vec = insertelement <2 x i32> undef, i32 %s, i32 0
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0>, <16 x i32> %shuf, <16 x i32> zeroinitializer
+ %cmp = icmp eq <16 x i32> %mask, zeroinitializer
+ %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
ret <16 x i32> %res
}
define <2 x i64> @test_i64_to_2_mem(i64* %p) {
@@ -2331,58 +2495,62 @@ define <2 x i64> @test_i64_to_2_mem(i64* %p) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default) {
+define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_2_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> %default
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p) {
+define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $2, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 0, i1 1>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default) {
+define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_2_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> %default
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
ret <2 x i64> %res
}
-define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p) {
+define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- %res = select <2 x i1> <i1 1, i1 0>, <2 x i64> %shuf, <2 x i64> zeroinitializer
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
ret <2 x i64> %res
}
define <4 x i64> @test_i64_to_4_mem(i64* %p) {
@@ -2395,112 +2563,120 @@ define <4 x i64> @test_i64_to_4_mem(i64* %p) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $5, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p) {
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $5, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $14, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p) {
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $14, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $11, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p) {
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $11, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 1>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default) {
+define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_4_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i64> %shuf, <4 x i64> %default
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
ret <4 x i64> %res
}
-define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p) {
+define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
- %res = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i64> %shuf, <4 x i64> zeroinitializer
+ %cmp = icmp eq <4 x i64> %mask, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
ret <4 x i64> %res
}
define <8 x i64> @test_i64_to_8_mem(i64* %p) {
@@ -2513,111 +2689,119 @@ define <8 x i64> @test_i64_to_8_mem(i64* %p) {
%res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-113, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p) {
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-113, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $4, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p) {
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask1:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $4, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-67, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p) {
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $-67, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default) {
+define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_i64_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $86, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> %default
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
ret <8 x i64> %res
}
-define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p) {
+define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p, <8 x i64> %mask) {
; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: movb $86, %al
-; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpbroadcastq (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
%shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x i64> %shuf, <8 x i64> zeroinitializer
+ %cmp = icmp eq <8 x i64> %mask, zeroinitializer
+ %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
ret <8 x i64> %res
}
OpenPOWER on IntegriCloud