summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-03-06 16:36:42 +0000
committerSanjay Patel <spatel@rotateright.com>2017-03-06 16:36:42 +0000
commit7f7947bf413151d26571719678bcbfab25cc2ab3 (patch)
tree496761091830112dd003d8daf9cd5882661c3e78 /llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
parentd1eff2f022929a2d8d7ab9f9db4c922381bd03c5 (diff)
downloadbcm5719-llvm-7f7947bf413151d26571719678bcbfab25cc2ab3.tar.gz
bcm5719-llvm-7f7947bf413151d26571719678bcbfab25cc2ab3.zip
[DAGCombiner] simplify div/rem-by-0
Refactoring of duplicated code and more fixes to follow. This is motivated by the post-commit comments for r296699: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20170306/435182.html Ie, we can crash if we're missing obvious simplifications like this that exist in the IR simplifier or if these occur later than expected. The x86 change for non-splat division shows a potential opportunity to improve vector codegen: we assumed that since only one lane had meaningful results, we should do the math in scalar. But that means moving back and forth from vector registers. llvm-svn: 297026
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll')
-rw-r--r--llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll103
1 files changed, 14 insertions, 89 deletions
diff --git a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
index f7151af528b..01a91936e56 100644
--- a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -49,56 +49,6 @@ entry:
ret <8 x i16> %0
}
-define <4 x i32> @sdiv_zero(<4 x i32> %var) {
-; SSE-LABEL: sdiv_zero:
-; SSE: # BB#0: # %entry
-; SSE-NEXT: pextrd $1, %xmm0, %eax
-; SSE-NEXT: xorl %esi, %esi
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %esi
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %esi
-; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: pinsrd $1, %ecx, %xmm1
-; SSE-NEXT: pextrd $2, %xmm0, %eax
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %esi
-; SSE-NEXT: pinsrd $2, %eax, %xmm1
-; SSE-NEXT: pextrd $3, %xmm0, %eax
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %esi
-; SSE-NEXT: pinsrd $3, %eax, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: retq
-;
-; AVX-LABEL: sdiv_zero:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: xorl %esi, %esi
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %esi
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %esi
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %esi
-; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %esi
-; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX-NEXT: retq
-entry:
- %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
- ret <4 x i32> %0
-}
-
define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
; SSE-LABEL: sdiv_vec4x32:
; SSE: # BB#0: # %entry
@@ -234,52 +184,27 @@ entry:
ret <16 x i16> %a0
}
+; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit?
+
define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
; SSE-LABEL: sdiv_non_splat:
; SSE: # BB#0:
-; SSE-NEXT: pextrd $1, %xmm0, %eax
-; SSE-NEXT: xorl %ecx, %ecx
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %ecx
-; SSE-NEXT: movd %xmm0, %edx
-; SSE-NEXT: movl %edx, %esi
-; SSE-NEXT: shrl $31, %esi
-; SSE-NEXT: addl %edx, %esi
-; SSE-NEXT: sarl %esi
-; SSE-NEXT: movd %esi, %xmm1
-; SSE-NEXT: pinsrd $1, %eax, %xmm1
-; SSE-NEXT: pextrd $2, %xmm0, %eax
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %ecx
-; SSE-NEXT: pinsrd $2, %eax, %xmm1
-; SSE-NEXT: pextrd $3, %xmm0, %eax
-; SSE-NEXT: cltd
-; SSE-NEXT: idivl %ecx
-; SSE-NEXT: pinsrd $3, %eax, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: shrl $31, %ecx
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: sarl %ecx
+; SSE-NEXT: movd %ecx, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_non_splat:
; AVX: # BB#0:
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: xorl %ecx, %ecx
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %ecx
-; AVX-NEXT: vmovd %xmm0, %edx
-; AVX-NEXT: movl %edx, %esi
-; AVX-NEXT: shrl $31, %esi
-; AVX-NEXT: addl %edx, %esi
-; AVX-NEXT: sarl %esi
-; AVX-NEXT: vmovd %esi, %xmm1
-; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %ecx
-; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: cltd
-; AVX-NEXT: idivl %ecx
-; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: shrl $31, %ecx
+; AVX-NEXT: addl %eax, %ecx
+; AVX-NEXT: sarl %ecx
+; AVX-NEXT: vmovd %ecx, %xmm0
; AVX-NEXT: retq
%y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
ret <4 x i32> %y
OpenPOWER on IntegriCloud