diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2017-03-06 16:36:42 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2017-03-06 16:36:42 +0000 |
| commit | 7f7947bf413151d26571719678bcbfab25cc2ab3 (patch) | |
| tree | 496761091830112dd003d8daf9cd5882661c3e78 /llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll | |
| parent | d1eff2f022929a2d8d7ab9f9db4c922381bd03c5 (diff) | |
| download | bcm5719-llvm-7f7947bf413151d26571719678bcbfab25cc2ab3.tar.gz bcm5719-llvm-7f7947bf413151d26571719678bcbfab25cc2ab3.zip | |
[DAGCombiner] simplify div/rem-by-0
Refactoring of duplicated code and more fixes to follow.
This is motivated by the post-commit comments for r296699:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20170306/435182.html
Ie, we can crash if we're missing obvious simplifications like this that
exist in the IR simplifier or if these occur later than expected.
The x86 change for non-splat division shows a potential opportunity to improve
vector codegen: we assumed that since only one lane had meaningful results, we
should do the math in scalar. But that means moving back and forth from vector
registers.
llvm-svn: 297026
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll | 103 |
1 files changed, 14 insertions, 89 deletions
diff --git a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll index f7151af528b..01a91936e56 100644 --- a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll +++ b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -49,56 +49,6 @@ entry: ret <8 x i16> %0 } -define <4 x i32> @sdiv_zero(<4 x i32> %var) { -; SSE-LABEL: sdiv_zero: -; SSE: # BB#0: # %entry -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: xorl %esi, %esi -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: pinsrd $2, %eax, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: pinsrd $3, %eax, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: sdiv_zero: -; AVX: # BB#0: # %entry -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: xorl %esi, %esi -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 -; AVX-NEXT: retq -entry: - %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0> - ret <4 x i32> %0 -} - define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { ; SSE-LABEL: sdiv_vec4x32: ; SSE: # BB#0: # %entry @@ -234,52 +184,27 @@ entry: ret <16 x i16> %a0 } +; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit? + define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { ; SSE-LABEL: sdiv_non_splat: ; SSE: # BB#0: -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movd %xmm0, %edx -; SSE-NEXT: movl %edx, %esi -; SSE-NEXT: shrl $31, %esi -; SSE-NEXT: addl %edx, %esi -; SSE-NEXT: sarl %esi -; SSE-NEXT: movd %esi, %xmm1 -; SSE-NEXT: pinsrd $1, %eax, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %eax, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %eax, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: shrl $31, %ecx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: sarl %ecx +; SSE-NEXT: movd %ecx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sdiv_non_splat: ; AVX: # BB#0: -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vmovd %xmm0, %edx -; AVX-NEXT: movl %edx, %esi -; AVX-NEXT: shrl $31, %esi -; AVX-NEXT: addl %edx, %esi -; AVX-NEXT: sarl %esi -; AVX-NEXT: vmovd %esi, %xmm1 -; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: shrl $31, %ecx +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: sarl %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: retq %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> ret <4 x i32> %y |

