diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-25 20:56:42 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-25 20:56:42 +0000 |
| commit | 4ebb04510a9c656fa83e91aec43380f6edec1f56 (patch) | |
| tree | 55c1ec0de884c3613bbda8923dfc892a80925ae9 | |
| parent | 8f29ea36c36291d66c2630e23e435bd75c3e491c (diff) | |
| download | bcm5719-llvm-4ebb04510a9c656fa83e91aec43380f6edec1f56.tar.gz bcm5719-llvm-4ebb04510a9c656fa83e91aec43380f6edec1f56.zip | |
[DAGCombiner] Enable sdiv(x.y) -> udiv(x,y) combine for vectors
SelectionDAG::SignBitIsZero (via SelectionDAG::computeKnownBits) has supported vectors since rL280927
llvm-svn: 285118
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-sdiv.ll | 48 |
2 files changed, 14 insertions, 40 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8f77ccd3c2f..daa0ea7afbd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2317,10 +2317,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); - } + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 892cf153645..2968addd316 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -82,52 +82,28 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { ; SSE: # BB#0: ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: sarl $31, %ecx -; SSE-NEXT: shrl $30, %ecx -; SSE-NEXT: addl %eax, %ecx -; SSE-NEXT: sarl $2, %ecx -; SSE-NEXT: pextrd $2, %xmm0, %eax +; SSE-NEXT: shrl $2, %eax +; SSE-NEXT: pextrd $2, %xmm0, %ecx ; SSE-NEXT: pextrd $3, %xmm0, %edx -; SSE-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: sarl $31, %ecx -; SSE-NEXT: shrl $29, %ecx -; SSE-NEXT: addl %eax, %ecx -; SSE-NEXT: sarl $3, %ecx +; SSE-NEXT: pinsrd $1, %eax, %xmm0 +; SSE-NEXT: shrl $3, %ecx ; SSE-NEXT: pinsrd $2, %ecx, %xmm0 -; SSE-NEXT: movl %edx, %eax -; SSE-NEXT: sarl $31, %eax -; SSE-NEXT: shrl $28, %eax -; SSE-NEXT: addl %edx, %eax -; SSE-NEXT: sarl $4, %eax -; SSE-NEXT: pinsrd $3, %eax, %xmm0 +; SSE-NEXT: shrl $4, %edx +; SSE-NEXT: pinsrd $3, %edx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_sdiv_by_pos1: ; AVX: # BB#0: ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: sarl $31, %ecx -; AVX-NEXT: shrl $30, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl $2, %ecx -; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm1 +; AVX-NEXT: shrl $2, %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 ; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: sarl $31, %ecx -; AVX-NEXT: shrl $29, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl $3, %ecx -; AVX-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; AVX-NEXT: shrl $3, %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 ; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: sarl $31, %ecx -; AVX-NEXT: shrl $28, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl $4, %ecx -; AVX-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm0 +; AVX-NEXT: shrl $4, %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255> %2 = sdiv <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16> |

