summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/combine-or.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2014-11-04 23:25:08 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2014-11-04 23:25:08 +0000
commitc9a0779309ebf8240dcea044fafbf072c1163095 (patch)
treeb6f9a9917a3ca17f506944e6b0c57afd695a7e97 /llvm/test/CodeGen/X86/combine-or.ll
parentde4277a076a22e396bf80f0974844af558deb8aa (diff)
downloadbcm5719-llvm-c9a0779309ebf8240dcea044fafbf072c1163095.tar.gz
bcm5719-llvm-c9a0779309ebf8240dcea044fafbf072c1163095.zip
[X86][SSE] Enable commutation for SSE immediate blend instructions
Patch to allow (v)blendps, (v)blendpd, (v)pblendw and vpblendd instructions to be commuted - swaps the src registers and inverts the blend mask. This is primarily to improve memory folding (see new tests), but it also improves the quality of shuffles (see modified tests). Differential Revision: http://reviews.llvm.org/D6015 llvm-svn: 221313
Diffstat (limited to 'llvm/test/CodeGen/X86/combine-or.ll')
-rw-r--r--llvm/test/CodeGen/X86/combine-or.ll31
1 files changed, 11 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index ba9d48d6047..9539eae9d6a 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -19,8 +19,7 @@ define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -32,8 +31,7 @@ define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
@@ -45,8 +43,7 @@ define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
@@ -106,8 +103,7 @@ define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test9:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
@@ -119,8 +115,7 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test10:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 0, i64 -1>
%and2 = and <2 x i64> %b, <i64 -1, i64 0>
@@ -132,8 +127,7 @@ define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test11:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
@@ -230,12 +224,10 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test18:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: blendps {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,1,1]
-; CHECK-NEXT: blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; CHECK-NEXT: orps %xmm0, %xmm2
-; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
@@ -295,8 +287,7 @@ define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_crash:
; CHECK: # BB#0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
OpenPOWER on IntegriCloud