diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-03-08 08:02:52 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-03-08 08:02:52 +0000 |
| commit | a406796f5f690da895015c4106a9289d88408c93 (patch) | |
| tree | 23ec944a7ebfe005996198aa42175b5d88812fae /llvm/test | |
| parent | 0feb0b9059792596e8386d030fa3d0aed9527798 (diff) | |
| download | bcm5719-llvm-a406796f5f690da895015c4106a9289d88408c93.tar.gz bcm5719-llvm-a406796f5f690da895015c4106a9289d88408c93.zip | |
[X86] Change X86::PMULDQ/PMULUDQ opcodes to take vXi64 type as input instead of vXi32.
This instruction can be thought of as reading either the even elements of a vXi32 input or the lower half of each element of a vXi64 input. We currently use the vXi32 interpretation, but vXi64 matches better with its broadcast behavior in EVEX.
I'm looking at moving MULDQ/MULUDQ creation to a DAG combine so we can do it when AVX512DQ is enabled without having to go through Custom lowering. But in some of the test cases we failed to use a broadcast load due to the size difference. This should help with that.
I'm also wondering if we can model these instructions in native IR and remove the intrinsics and I think using a vXi64 type will work better with that.
llvm-svn: 326991
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 60 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 72 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-mul.ll | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vselect-avx.ll | 13 |
5 files changed, 115 insertions, 124 deletions
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 76005a037b9..efe693949ab 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -104,13 +104,12 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: test_div7_4i32: ; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuldq %xmm2, %xmm3 +; SSE41-NEXT: pmuldq %xmm1, %xmm2 ; SSE41-NEXT: pmuldq %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; SSE41-NEXT: paddd %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrld $31, %xmm0 @@ -121,13 +120,12 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { ; ; AVX1-LABEL: test_div7_4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 ; AVX1-NEXT: vpsrad $2, %xmm0, %xmm0 @@ -417,31 +415,29 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: test_rem7_4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuldq %xmm2, %xmm3 -; SSE41-NEXT: pmuldq %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE41-NEXT: paddd %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm2 -; SSE41-NEXT: psrld $31, %xmm2 -; SSE41-NEXT: psrad $2, %xmm1 -; SSE41-NEXT: paddd %xmm2, %xmm1 -; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 -; SSE41-NEXT: psubd %xmm1, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] +; SSE41-NEXT: pmuldq %xmm2, %xmm1 +; SSE41-NEXT: pmuldq %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; SSE41-NEXT: paddd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: psrld $31, %xmm1 +; SSE41-NEXT: psrad $2, %xmm2 +; SSE41-NEXT: paddd %xmm1, %xmm2 +; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; SSE41-NEXT: psubd %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test_rem7_4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index c85128893da..a4194464c32 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -88,7 +88,6 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pmuludq %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE2-NEXT: pmuludq %xmm1, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] @@ -101,28 +100,26 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: test_div7_4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq %xmm2, %xmm3 -; SSE41-NEXT: pmuludq %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE41-NEXT: psubd %xmm1, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] +; SSE41-NEXT: pmuludq %xmm2, %xmm1 +; SSE41-NEXT: pmuludq %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; SSE41-NEXT: psubd %xmm2, %xmm0 ; SSE41-NEXT: psrld $1, %xmm0 -; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: paddd %xmm2, %xmm0 ; SSE41-NEXT: psrld $2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test_div7_4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] +; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 @@ -371,7 +368,6 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pmuludq %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE2-NEXT: pmuludq %xmm1, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] @@ -393,31 +389,29 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: test_rem7_4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq %xmm2, %xmm3 -; SSE41-NEXT: pmuludq %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psubd %xmm1, %xmm2 -; SSE41-NEXT: psrld $1, %xmm2 -; SSE41-NEXT: paddd %xmm1, %xmm2 -; SSE41-NEXT: psrld $2, %xmm2 -; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 -; SSE41-NEXT: psubd %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] +; SSE41-NEXT: pmuludq %xmm2, %xmm1 +; SSE41-NEXT: pmuludq %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psubd %xmm2, %xmm1 +; SSE41-NEXT: psrld $1, %xmm1 +; SSE41-NEXT: paddd %xmm2, %xmm1 +; SSE41-NEXT: psrld $2, %xmm1 +; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 +; SSE41-NEXT: psubd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test_rem7_4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] +; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2 ; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-idiv.ll b/llvm/test/CodeGen/X86/vector-idiv.ll index e2f769761e1..205cb2d5e9c 100644 --- a/llvm/test/CodeGen/X86/vector-idiv.ll +++ b/llvm/test/CodeGen/X86/vector-idiv.ll @@ -46,27 +46,25 @@ define <4 x i32> @PR20355(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: PR20355: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuldq %xmm2, %xmm3 -; SSE41-NEXT: pmuldq %xmm1, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1431655766,1431655766,1431655766,1431655766] +; SSE41-NEXT: pmuldq %xmm2, %xmm1 +; SSE41-NEXT: pmuldq %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psrld $31, %xmm0 -; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: paddd %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: PR20355: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1431655766,1431655766,1431655766,1431655766] +; AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 811084ea892..9c4112639de 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -695,16 +695,17 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind { define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; X86-LABEL: mul_v2i64_neg_15_63: ; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967281,4294967295,4294967233,4294967295] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: movdqa %xmm0, %xmm3 +; X86-NEXT: movdqa %xmm0, %xmm1 +; X86-NEXT: psrlq $32, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967281,4294967295,4294967233,4294967295] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: movdqa %xmm2, %xmm3 ; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm1, %xmm3 -; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 +; X86-NEXT: pmuludq %xmm0, %xmm3 +; X86-NEXT: paddq %xmm1, %xmm3 +; X86-NEXT: psllq $32, %xmm3 +; X86-NEXT: pmuludq %xmm2, %xmm0 ; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: mul_v2i64_neg_15_63: @@ -739,16 +740,17 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; X86-LABEL: mul_v2i64_neg_17_65: ; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967279,4294967295,4294967231,4294967295] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: movdqa %xmm0, %xmm3 +; X86-NEXT: movdqa %xmm0, %xmm1 +; X86-NEXT: psrlq $32, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967279,4294967295,4294967231,4294967295] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: movdqa %xmm2, %xmm3 ; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm1, %xmm3 -; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 +; X86-NEXT: pmuludq %xmm0, %xmm3 +; X86-NEXT: paddq %xmm1, %xmm3 +; X86-NEXT: psllq $32, %xmm3 +; X86-NEXT: pmuludq %xmm2, %xmm0 ; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: mul_v2i64_neg_17_65: @@ -823,16 +825,17 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X86-LABEL: mul_v2i64_neg_0_1: ; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [0,0,4294967295,4294967295] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: movdqa %xmm0, %xmm3 +; X86-NEXT: movdqa %xmm0, %xmm1 +; X86-NEXT: psrlq $32, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: movdqa %xmm2, %xmm3 ; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm1, %xmm3 -; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 +; X86-NEXT: pmuludq %xmm0, %xmm3 +; X86-NEXT: paddq %xmm1, %xmm3 +; X86-NEXT: psllq $32, %xmm3 +; X86-NEXT: pmuludq %xmm2, %xmm0 ; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: mul_v2i64_neg_0_1: @@ -876,16 +879,17 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X86-LABEL: mul_v2i64_15_neg_63: ; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [15,0,4294967233,4294967295] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: movdqa %xmm0, %xmm3 +; X86-NEXT: movdqa %xmm0, %xmm1 +; X86-NEXT: psrlq $32, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: movdqa %xmm2, %xmm3 ; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm1, %xmm3 -; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 +; X86-NEXT: pmuludq %xmm0, %xmm3 +; X86-NEXT: paddq %xmm1, %xmm3 +; X86-NEXT: psllq $32, %xmm3 +; X86-NEXT: pmuludq %xmm2, %xmm0 ; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: mul_v2i64_15_neg_63: diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll index 9c2ae113c14..be23d4b4195 100644 --- a/llvm/test/CodeGen/X86/vselect-avx.ll +++ b/llvm/test/CodeGen/X86/vselect-avx.ll @@ -85,13 +85,12 @@ bb: define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) { ; AVX1-LABEL: test3: ; AVX1: ## %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1431655766,1431655766,1431655766,1431655766] -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1431655766,1431655766,1431655766,1431655766] +; AVX1-NEXT: vpmuldq %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpmuldq %xmm4, %xmm0, %xmm4 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] ; AVX1-NEXT: vpsrld $31, %xmm3, %xmm4 ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3 |

