From 6b988ad8f2285e8bb3c56db38c0416f6186d3889 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 2 Dec 2014 22:31:23 +0000 Subject: [X86][SSE] Keep 4i32 vector insertions in integer domain on SSE4.1 targets 4i32 shuffles for single insertions into zero vectors lowers to X86vzmovl which was using (v)blendps - causing domain switch stalls. This patch fixes this by using (v)pblendw instead. The updated tests on test/CodeGen/X86/sse41.ll still contain a domain stall due to the use of insertps - I'm looking at fixing this in a future patch. Differential Revision: http://reviews.llvm.org/D6458 llvm-svn: 223165 --- llvm/test/CodeGen/X86/combine-or.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'llvm/test/CodeGen/X86/combine-or.ll') diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index f10092a1378..c70067f389a 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -223,10 +223,10 @@ define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test18: ; CHECK: # BB#0: -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; CHECK-NEXT: pxor %xmm2, %xmm2 +; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; CHECK-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> -- cgit v1.2.3