summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-03-08 08:02:52 +0000
committerCraig Topper <craig.topper@intel.com>2018-03-08 08:02:52 +0000
commita406796f5f690da895015c4106a9289d88408c93 (patch)
tree23ec944a7ebfe005996198aa42175b5d88812fae /llvm/test
parent0feb0b9059792596e8386d030fa3d0aed9527798 (diff)
downloadbcm5719-llvm-a406796f5f690da895015c4106a9289d88408c93.tar.gz
bcm5719-llvm-a406796f5f690da895015c4106a9289d88408c93.zip
[X86] Change X86::PMULDQ/PMULUDQ opcodes to take vXi64 type as input instead of vXi32.
This instruction can be thought of as reading either the even elements of a vXi32 input or the lower half of each element of a vXi64 input. We currently use the vXi32 interpretation, but vXi64 matches better with its broadcast behavior in EVEX. I'm looking at moving MULDQ/MULUDQ creation to a DAG combine so we can do it when AVX512DQ is enabled without having to go through Custom lowering. But in some of the test cases we failed to use a broadcast load due to the size difference. This should help with that. I'm also wondering if we can model these instructions in native IR and remove the intrinsics and I think using a vXi64 type will work better with that. llvm-svn: 326991
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll60
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll72
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv.ll26
-rw-r--r--llvm/test/CodeGen/X86/vector-mul.ll68
-rw-r--r--llvm/test/CodeGen/X86/vselect-avx.ll13
5 files changed, 115 insertions, 124 deletions
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 76005a037b9..efe693949ab 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -104,13 +104,12 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: test_div7_4i32:
; SSE41: # %bb.0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm2, %xmm3
+; SSE41-NEXT: pmuldq %xmm1, %xmm2
; SSE41-NEXT: pmuldq %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; SSE41-NEXT: paddd %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrld $31, %xmm0
@@ -121,13 +120,12 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind {
;
; AVX1-LABEL: test_div7_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $2, %xmm0, %xmm0
@@ -417,31 +415,29 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: test_rem7_4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm2, %xmm3
-; SSE41-NEXT: pmuldq %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE41-NEXT: paddd %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: psrld $31, %xmm2
-; SSE41-NEXT: psrad $2, %xmm1
-; SSE41-NEXT: paddd %xmm2, %xmm1
-; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
-; SSE41-NEXT: psubd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT: pmuldq %xmm2, %xmm1
+; SSE41-NEXT: pmuldq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: paddd %xmm0, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: psrld $31, %xmm1
+; SSE41-NEXT: psrad $2, %xmm2
+; SSE41-NEXT: paddd %xmm1, %xmm2
+; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; SSE41-NEXT: psubd %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test_rem7_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
index c85128893da..a4194464c32 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
@@ -88,7 +88,6 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmuludq %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
@@ -101,28 +100,26 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: test_div7_4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm2, %xmm3
-; SSE41-NEXT: pmuludq %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE41-NEXT: psubd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT: pmuludq %xmm2, %xmm1
+; SSE41-NEXT: pmuludq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: psubd %xmm2, %xmm0
; SSE41-NEXT: psrld $1, %xmm0
-; SSE41-NEXT: paddd %xmm1, %xmm0
+; SSE41-NEXT: paddd %xmm2, %xmm0
; SSE41-NEXT: psrld $2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test_div7_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
@@ -371,7 +368,6 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmuludq %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
@@ -393,31 +389,29 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: test_rem7_4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm2, %xmm3
-; SSE41-NEXT: pmuludq %xmm0, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psubd %xmm1, %xmm2
-; SSE41-NEXT: psrld $1, %xmm2
-; SSE41-NEXT: paddd %xmm1, %xmm2
-; SSE41-NEXT: psrld $2, %xmm2
-; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
-; SSE41-NEXT: psubd %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT: pmuludq %xmm2, %xmm1
+; SSE41-NEXT: pmuludq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psubd %xmm2, %xmm1
+; SSE41-NEXT: psrld $1, %xmm1
+; SSE41-NEXT: paddd %xmm2, %xmm1
+; SSE41-NEXT: psrld $2, %xmm1
+; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
+; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test_rem7_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv.ll b/llvm/test/CodeGen/X86/vector-idiv.ll
index e2f769761e1..205cb2d5e9c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv.ll
@@ -46,27 +46,25 @@ define <4 x i32> @PR20355(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: PR20355:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm2, %xmm3
-; SSE41-NEXT: pmuldq %xmm1, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1431655766,1431655766,1431655766,1431655766]
+; SSE41-NEXT: pmuldq %xmm2, %xmm1
+; SSE41-NEXT: pmuldq %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: psrld $31, %xmm0
-; SSE41-NEXT: paddd %xmm1, %xmm0
+; SSE41-NEXT: paddd %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: PR20355:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1431655766,1431655766,1431655766,1431655766]
+; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 811084ea892..9c4112639de 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -695,16 +695,17 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_neg_15_63:
; X86: # %bb.0:
-; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967281,4294967295,4294967233,4294967295]
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pmuludq %xmm1, %xmm2
-; X86-NEXT: movdqa %xmm0, %xmm3
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrlq $32, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967281,4294967295,4294967233,4294967295]
+; X86-NEXT: pmuludq %xmm2, %xmm1
+; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
-; X86-NEXT: pmuludq %xmm1, %xmm3
-; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-NEXT: pmuludq %xmm0, %xmm3
+; X86-NEXT: paddq %xmm1, %xmm3
+; X86-NEXT: psllq $32, %xmm3
+; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_neg_15_63:
@@ -739,16 +740,17 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_neg_17_65:
; X86: # %bb.0:
-; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967279,4294967295,4294967231,4294967295]
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pmuludq %xmm1, %xmm2
-; X86-NEXT: movdqa %xmm0, %xmm3
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrlq $32, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967279,4294967295,4294967231,4294967295]
+; X86-NEXT: pmuludq %xmm2, %xmm1
+; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
-; X86-NEXT: pmuludq %xmm1, %xmm3
-; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-NEXT: pmuludq %xmm0, %xmm3
+; X86-NEXT: paddq %xmm1, %xmm3
+; X86-NEXT: psllq $32, %xmm3
+; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_neg_17_65:
@@ -823,16 +825,17 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_neg_0_1:
; X86: # %bb.0:
-; X86-NEXT: movdqa {{.*#+}} xmm1 = [0,0,4294967295,4294967295]
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pmuludq %xmm1, %xmm2
-; X86-NEXT: movdqa %xmm0, %xmm3
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrlq $32, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295]
+; X86-NEXT: pmuludq %xmm2, %xmm1
+; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
-; X86-NEXT: pmuludq %xmm1, %xmm3
-; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-NEXT: pmuludq %xmm0, %xmm3
+; X86-NEXT: paddq %xmm1, %xmm3
+; X86-NEXT: psllq $32, %xmm3
+; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_neg_0_1:
@@ -876,16 +879,17 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_15_neg_63:
; X86: # %bb.0:
-; X86-NEXT: movdqa {{.*#+}} xmm1 = [15,0,4294967233,4294967295]
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pmuludq %xmm1, %xmm2
-; X86-NEXT: movdqa %xmm0, %xmm3
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrlq $32, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295]
+; X86-NEXT: pmuludq %xmm2, %xmm1
+; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
-; X86-NEXT: pmuludq %xmm1, %xmm3
-; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-NEXT: pmuludq %xmm0, %xmm3
+; X86-NEXT: paddq %xmm1, %xmm3
+; X86-NEXT: psllq $32, %xmm3
+; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_15_neg_63:
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 9c2ae113c14..be23d4b4195 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -85,13 +85,12 @@ bb:
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
; AVX1-LABEL: test3:
; AVX1: ## %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1431655766,1431655766,1431655766,1431655766]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuldq %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1431655766,1431655766,1431655766,1431655766]
+; AVX1-NEXT: vpmuldq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpmuldq %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; AVX1-NEXT: vpsrld $31, %xmm3, %xmm4
; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3
OpenPOWER on IntegriCloud