diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_store.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/masked_store.ll | 114 |
1 files changed, 59 insertions, 55 deletions
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index 1a6be01d804..422eb1f3e48 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -165,9 +165,11 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, <4 x double>* %addr, <4 x dou define void @store_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { ; SSE2-LABEL: store_v2f32_v2i32: ; SSE2: ## %bb.0: +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: movmskpd %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB3_1 @@ -188,8 +190,8 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x floa ; SSE4-LABEL: store_v2f32_v2i32: ; SSE4: ## %bb.0: ; SSE4-NEXT: pxor %xmm2, %xmm2 -; SSE4-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE4-NEXT: pmovsxdq %xmm2, %xmm0 +; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; SSE4-NEXT: pcmpeqq %xmm2, %xmm0 ; SSE4-NEXT: movmskpd %xmm0, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne LBB3_1 @@ -206,40 +208,43 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x floa ; SSE4-NEXT: extractps $1, %xmm1, 4(%rdi) ; SSE4-NEXT: retq ; -; AVX1OR2-LABEL: store_v2f32_v2i32: -; AVX1OR2: ## %bb.0: -; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX1OR2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) -; AVX1OR2-NEXT: retq +; AVX1-LABEL: store_v2f32_v2i32: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: store_v2f32_v2i32: +; AVX2: ## %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; AVX2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: store_v2f32_v2i32: ; AVX512F: ## %bb.0: ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftlw $14, %k0, %k0 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1 ; AVX512F-NEXT: vmovups %zmm1, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; -; AVX512VLDQ-LABEL: store_v2f32_v2i32: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vptestnmd %xmm0, %xmm0, %k0 -; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vmovups %xmm1, (%rdi) {%k1} -; AVX512VLDQ-NEXT: retq -; -; AVX512VLBW-LABEL: store_v2f32_v2i32: -; AVX512VLBW: ## %bb.0: -; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k0 -; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512VLBW-NEXT: vmovups %xmm1, (%rdi) {%k1} -; AVX512VLBW-NEXT: retq +; AVX512VL-LABEL: store_v2f32_v2i32: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vmovups %xmm1, (%rdi) {%k1} +; AVX512VL-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) ret void @@ -1041,9 +1046,11 @@ define void @store_v1i32_v1i32(<1 x i32> %trigger, <1 x i32>* %addr, <1 x i32> % define void @store_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { ; SSE2-LABEL: store_v2i32_v2i32: ; SSE2: ## %bb.0: +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: movmskpd %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB10_1 @@ -1057,15 +1064,15 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> % ; SSE2-NEXT: testb $2, %al ; SSE2-NEXT: je LBB10_4 ; SSE2-NEXT: LBB10_3: ## %cond.store1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: movd %xmm0, 4(%rdi) ; SSE2-NEXT: retq ; ; SSE4-LABEL: store_v2i32_v2i32: ; SSE4: ## %bb.0: ; SSE4-NEXT: pxor %xmm2, %xmm2 -; SSE4-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE4-NEXT: pmovsxdq %xmm2, %xmm0 +; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; SSE4-NEXT: pcmpeqq %xmm2, %xmm0 ; SSE4-NEXT: movmskpd %xmm0, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne LBB10_1 @@ -1079,51 +1086,48 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> % ; SSE4-NEXT: testb $2, %al ; SSE4-NEXT: je LBB10_4 ; SSE4-NEXT: LBB10_3: ## %cond.store1 -; SSE4-NEXT: extractps $1, %xmm1, 4(%rdi) +; SSE4-NEXT: extractps $2, %xmm1, 4(%rdi) ; SSE4-NEXT: retq ; ; AVX1-LABEL: store_v2i32_v2i32: ; AVX1: ## %bb.0: ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] ; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: store_v2i32_v2i32: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; AVX2-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) ; AVX2-NEXT: retq ; ; AVX512F-LABEL: store_v2i32_v2i32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; AVX512F-NEXT: kshiftlw $14, %k0, %k0 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1 -; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; -; AVX512VLDQ-LABEL: store_v2i32_v2i32: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vptestnmd %xmm0, %xmm0, %k0 -; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} -; AVX512VLDQ-NEXT: retq -; -; AVX512VLBW-LABEL: store_v2i32_v2i32: -; AVX512VLBW: ## %bb.0: -; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k0 -; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512VLBW-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} -; AVX512VLBW-NEXT: retq +; AVX512VL-LABEL: store_v2i32_v2i32: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1 +; AVX512VL-NEXT: vpmovqd %xmm1, (%rdi) {%k1} +; AVX512VL-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) ret void |