summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-24 19:11:28 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-24 19:11:28 +0000
commitc5bb362b1328dbce6cba55452bce12eed8f72663 (patch)
tree07981dae1d753db03f2ab5a133251b9458f21735 /llvm/test/CodeGen/X86
parent06570954e2cd955f3c7c246131ca350ade78d754 (diff)
downloadbcm5719-llvm-c5bb362b1328dbce6cba55452bce12eed8f72663.tar.gz
bcm5719-llvm-c5bb362b1328dbce6cba55452bce12eed8f72663.zip
[X86][SSE] Add SimplifyDemandedBitsForTargetNode PMULDQ/PMULUDQ handling
Add X86 SimplifyDemandedBitsForTargetNode and use it to simplify PMULDQ/PMULUDQ target nodes. This enables us to repeatedly simplify the node's arguments after the previous approach had to be reverted due to PR39398. Differential Revision: https://reviews.llvm.org/D53643 llvm-svn: 345182
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/combine-pmuldq.ll30
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll68
2 files changed, 38 insertions, 60 deletions
diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll
index edc6cb01d97..cd58947b186 100644
--- a/llvm/test/CodeGen/X86/combine-pmuldq.ll
+++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll
@@ -47,26 +47,10 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-NEXT: pmuludq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: combine_shuffle_zero_pmuludq:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: combine_shuffle_zero_pmuludq:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX-LABEL: combine_shuffle_zero_pmuludq:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%3 = bitcast <4 x i32> %1 to <2 x i64>
@@ -84,22 +68,16 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
;
; AVX2-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
%1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
index 9f339a8a555..82385386c88 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
@@ -143,31 +143,31 @@ define <4 x i32> @test_urem_odd_div(<4 x i32> %X) nounwind readnone {
define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_even_div:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,3435973837,2863311531,2454267027]
-; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,2454267027]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrld $1, %xmm3
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
-; CHECK-SSE2-NEXT: psrld $2, %xmm2
-; CHECK-SSE2-NEXT: psrld $3, %xmm1
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $2, %xmm1
+; CHECK-SSE2-NEXT: psrld $3, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [6,10,12,14]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm1[1,2]
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,3,1]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm2[1,2]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
@@ -377,30 +377,30 @@ define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_one:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,0,2863311531,2454267027]
-; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,2863311531,2454267027]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrld $1, %xmm3
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
-; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $2, %xmm1
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[0,0]
-; CHECK-SSE2-NEXT: psrld $3, %xmm1
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm1[2,3]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,1,12,14]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm1[0,0]
+; CHECK-SSE2-NEXT: psrld $3, %xmm2
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [6,1,12,14]
+; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm2[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
OpenPOWER on IntegriCloud