summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2011-12-20 11:19:37 +0000
committerChandler Carruth <chandlerc@gmail.com>2011-12-20 11:19:37 +0000
commit24680c24d868d419a41e4f99171447f5aec12b98 (patch)
treeff41b848e82d661054e599a8692485070e7406d5 /llvm/test
parentfe198ced31e9d055e8d27fa992423d59af4f1535 (diff)
downloadbcm5719-llvm-24680c24d868d419a41e4f99171447f5aec12b98.tar.gz
bcm5719-llvm-24680c24d868d419a41e4f99171447f5aec12b98.zip
Begin teaching the X86 target how to efficiently codegen patterns that
use the zero-undefined variants of CTTZ and CTLZ. These are just simple patterns for now, there is more to be done to make real world code using these constructs be optimized and codegen'ed properly on X86. The existing tests are spiffed up to check that we no longer generate unnecessary cmov instructions, and that we generate the very important 'xor' to transform bsr which counts the index of the most significant one bit to the number of leading (most significant) zero bits. Also they now check that when the variant with defined zero result is used, the cmov is still produced. llvm-svn: 146974
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/clz.ll47
1 files changed, 32 insertions, 15 deletions
diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index 9b26efd10de..4e080309b6f 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -1,48 +1,65 @@
; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
define i32 @t1(i32 %x) nounwind {
- %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
- ret i32 %tmp
+ %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+ ret i32 %tmp
; CHECK: t1:
; CHECK: bsrl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
}
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
define i32 @t2(i32 %x) nounwind {
- %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
- ret i32 %tmp
+ %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+ ret i32 %tmp
; CHECK: t2:
; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
}
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
define i16 @t3(i16 %x, i16 %y) nounwind {
entry:
- %tmp1 = add i16 %x, %y
- %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; <i16> [#uses=1]
- ret i16 %tmp2
+ %tmp1 = add i16 %x, %y
+ %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; <i16> [#uses=1]
+ ret i16 %tmp2
; CHECK: t3:
; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorw $15,
+; CHECK: ret
}
declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
-; Don't generate the cmovne when the source is known non-zero (and bsr would
-; not set ZF).
-; rdar://9490949
-
define i32 @t4(i32 %n) nounwind {
entry:
+; Generate a cmov to handle zero inputs when necessary.
; CHECK: t4:
; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+ %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+ ret i32 %tmp1
+}
+
+define i32 @t5(i32 %n) nounwind {
+entry:
+; Don't generate the cmovne when the source is known non-zero (and bsr would
+; not set ZF).
+; rdar://9490949
+; CHECK: t5:
+; CHECK: bsrl
; CHECK-NOT: cmov
+; CHECK: xorl $31,
; CHECK: ret
%or = or i32 %n, 1
- %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
+ %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
ret i32 %tmp1
}
OpenPOWER on IntegriCloud