summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/masked_load.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_load.ll')
-rw-r--r--llvm/test/CodeGen/X86/masked_load.ll304
1 files changed, 158 insertions, 146 deletions
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index dcae1627824..271e2403a5d 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -458,40 +458,38 @@ define <8 x double> @load_v8f64_v8i16(<8 x i16> %trigger, <8 x double>* %addr, <
;
; AVX1-LABEL: load_v8f64_v8i16:
; AVX1: ## %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpmovsxwd %xmm3, %xmm3
-; AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpmovsxdq %xmm4, %xmm5
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm4, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
-; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm4
-; AVX1-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
-; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm3, %ymm1
-; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm3
+; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm1, %ymm0
+; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm4, %ymm1
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_v8f64_v8i16:
; AVX2: ## %bb.0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpmovsxwd %xmm3, %xmm3
-; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4
+; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm4
-; AVX2-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
-; AVX2-NEXT: vmaskmovpd 32(%rdi), %ymm3, %ymm1
-; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm3
+; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm1, %ymm0
+; AVX2-NEXT: vmaskmovpd 32(%rdi), %ymm4, %ymm1
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v8f64_v8i16:
@@ -725,9 +723,11 @@ define <8 x double> @load_v8f64_v8i64(<8 x i64> %trigger, <8 x double>* %addr, <
define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
; SSE2-LABEL: load_v2f32_v2i32:
; SSE2: ## %bb.0:
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: jne LBB7_1
@@ -753,8 +753,8 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2
; SSE42-LABEL: load_v2f32_v2i32:
; SSE42: ## %bb.0:
; SSE42-NEXT: pxor %xmm2, %xmm2
-; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE42-NEXT: pmovsxdq %xmm2, %xmm0
+; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE42-NEXT: pcmpeqq %xmm2, %xmm0
; SSE42-NEXT: movmskpd %xmm0, %eax
; SSE42-NEXT: testb $1, %al
; SSE42-NEXT: jne LBB7_1
@@ -774,20 +774,32 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2
; SSE42-NEXT: movaps %xmm1, %xmm0
; SSE42-NEXT: retq
;
-; AVX1OR2-LABEL: load_v2f32_v2i32:
-; AVX1OR2: ## %bb.0:
-; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; AVX1OR2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
-; AVX1OR2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: load_v2f32_v2i32:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
+; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_v2f32_v2i32:
+; AVX2: ## %bb.0:
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
+; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v2f32_v2i32:
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1}
@@ -795,21 +807,13 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
-; AVX512VLDQ-LABEL: load_v2f32_v2i32:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
-; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
-; AVX512VLDQ-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
-; AVX512VLDQ-NEXT: retq
-;
-; AVX512VLBW-LABEL: load_v2f32_v2i32:
-; AVX512VLBW: ## %bb.0:
-; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
-; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1
-; AVX512VLBW-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
-; AVX512VLBW-NEXT: retq
+; AVX512VL-LABEL: load_v2f32_v2i32:
+; AVX512VL: ## %bb.0:
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1
+; AVX512VL-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
+; AVX512VL-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
ret <2 x float> %res
@@ -818,9 +822,11 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2
define <2 x float> @load_v2f32_v2i32_undef(<2 x i32> %trigger, <2 x float>* %addr) {
; SSE2-LABEL: load_v2f32_v2i32_undef:
; SSE2: ## %bb.0:
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: ## implicit-def: $xmm0
@@ -844,8 +850,8 @@ define <2 x float> @load_v2f32_v2i32_undef(<2 x i32> %trigger, <2 x float>* %add
; SSE42-LABEL: load_v2f32_v2i32_undef:
; SSE42: ## %bb.0:
; SSE42-NEXT: pxor %xmm1, %xmm1
-; SSE42-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE42-NEXT: pmovsxdq %xmm1, %xmm0
+; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
; SSE42-NEXT: movmskpd %xmm0, %eax
; SSE42-NEXT: testb $1, %al
; SSE42-NEXT: ## implicit-def: $xmm0
@@ -863,18 +869,29 @@ define <2 x float> @load_v2f32_v2i32_undef(<2 x i32> %trigger, <2 x float>* %add
; SSE42-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE42-NEXT: retq
;
-; AVX1OR2-LABEL: load_v2f32_v2i32_undef:
-; AVX1OR2: ## %bb.0:
-; AVX1OR2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; AVX1OR2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: load_v2f32_v2i32_undef:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_v2f32_v2i32_undef:
+; AVX2: ## %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v2f32_v2i32_undef:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z}
@@ -882,21 +899,13 @@ define <2 x float> @load_v2f32_v2i32_undef(<2 x i32> %trigger, <2 x float>* %add
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
-; AVX512VLDQ-LABEL: load_v2f32_v2i32_undef:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
-; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
-; AVX512VLDQ-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
-; AVX512VLDQ-NEXT: retq
-;
-; AVX512VLBW-LABEL: load_v2f32_v2i32_undef:
-; AVX512VLBW: ## %bb.0:
-; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
-; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1
-; AVX512VLBW-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
-; AVX512VLBW-NEXT: retq
+; AVX512VL-LABEL: load_v2f32_v2i32_undef:
+; AVX512VL: ## %bb.0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1
+; AVX512VL-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
+; AVX512VL-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float>undef)
ret <2 x float> %res
@@ -1783,40 +1792,38 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6
;
; AVX1-LABEL: load_v8i64_v8i16:
; AVX1: ## %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpmovsxwd %xmm3, %xmm3
-; AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpmovsxdq %xmm4, %xmm5
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm4, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
-; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm4
-; AVX1-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
-; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm3, %ymm1
-; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm3
+; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm1, %ymm0
+; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm4, %ymm1
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_v8i64_v8i16:
; AVX2: ## %bb.0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpmovsxwd %xmm3, %xmm3
-; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm4
+; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm4
-; AVX2-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
-; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm3, %ymm1
-; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm3
+; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm1, %ymm0
+; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm4, %ymm1
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v8i64_v8i16:
@@ -2054,9 +2061,11 @@ define <8 x i64> @load_v8i64_v8i64(<8 x i64> %trigger, <8 x i64>* %addr, <8 x i6
define <2 x i32> @load_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
; SSE2-LABEL: load_v2i32_v2i32:
; SSE2: ## %bb.0:
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: jne LBB17_1
@@ -2064,26 +2073,26 @@ define <2 x i32> @load_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i3
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: jne LBB17_3
; SSE2-NEXT: LBB17_4: ## %else2
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2-NEXT: LBB17_1: ## %cond.load
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movl (%rdi), %ecx
+; SSE2-NEXT: movq %rcx, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: testb $2, %al
; SSE2-NEXT: je LBB17_4
; SSE2-NEXT: LBB17_3: ## %cond.load1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movl 4(%rdi), %eax
+; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE42-LABEL: load_v2i32_v2i32:
; SSE42: ## %bb.0:
; SSE42-NEXT: pxor %xmm2, %xmm2
-; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE42-NEXT: pmovsxdq %xmm2, %xmm0
+; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE42-NEXT: pcmpeqq %xmm2, %xmm0
; SSE42-NEXT: movmskpd %xmm0, %eax
; SSE42-NEXT: testb $1, %al
; SSE42-NEXT: jne LBB17_1
@@ -2094,59 +2103,62 @@ define <2 x i32> @load_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i3
; SSE42-NEXT: movdqa %xmm1, %xmm0
; SSE42-NEXT: retq
; SSE42-NEXT: LBB17_1: ## %cond.load
-; SSE42-NEXT: pinsrd $0, (%rdi), %xmm1
+; SSE42-NEXT: movl (%rdi), %ecx
+; SSE42-NEXT: pinsrq $0, %rcx, %xmm1
; SSE42-NEXT: testb $2, %al
; SSE42-NEXT: je LBB17_4
; SSE42-NEXT: LBB17_3: ## %cond.load1
-; SSE42-NEXT: pinsrd $1, 4(%rdi), %xmm1
+; SSE42-NEXT: movl 4(%rdi), %eax
+; SSE42-NEXT: pinsrq $1, %rax, %xmm1
; SSE42-NEXT: movdqa %xmm1, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: load_v2i32_v2i32:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_v2i32_v2i32:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
+; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v2i32_v2i32:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
-; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1}
-; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1}
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
-; AVX512VLDQ-LABEL: load_v2i32_v2i32:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
-; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
-; AVX512VLDQ-NEXT: vpblendmd (%rdi), %xmm1, %xmm0 {%k1}
-; AVX512VLDQ-NEXT: retq
-;
-; AVX512VLBW-LABEL: load_v2i32_v2i32:
-; AVX512VLBW: ## %bb.0:
-; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k0
-; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
-; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1
-; AVX512VLBW-NEXT: vpblendmd (%rdi), %xmm1, %xmm0 {%k1}
-; AVX512VLBW-NEXT: retq
+; AVX512VL-LABEL: load_v2i32_v2i32:
+; AVX512VL: ## %bb.0:
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; AVX512VL-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
ret <2 x i32> %res
OpenPOWER on IntegriCloud