From 6b988ad8f2285e8bb3c56db38c0416f6186d3889 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 2 Dec 2014 22:31:23 +0000
Subject: [X86][SSE] Keep 4i32 vector insertions in integer domain on SSE4.1
 targets

4i32 shuffles for single insertions into zero vectors lowers to X86vzmovl which was using (v)blendps - causing domain switch stalls. This patch fixes this by using (v)pblendw instead.

The updated tests on test/CodeGen/X86/sse41.ll still contain a domain stall due to the use of insertps - I'm looking at fixing this in a future patch.

Differential Revision: http://reviews.llvm.org/D6458

llvm-svn: 223165
---
 llvm/test/CodeGen/X86/combine-or.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'llvm/test/CodeGen/X86/combine-or.ll')
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index f10092a1378..c70067f389a 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -223,10 +223,10 @@ define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test18:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    xorps %xmm2, %xmm2
-; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; CHECK-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
-- 
cgit v1.2.3