Switch the lowering of CTLZ_ZERO_UNDEF from a .td pattern back to the

X86ISelLowering C++ code. Because this is lowered via an xor wrapped around a bsr, we want the dagcombine which runs after isel lowering to have a chance to clean things up. In particular, it is very common to see code which looks like: (sizeof(x)*8 - 1) ^ __builtin_clz(x) Which is trying to compute the most significant bit of 'x'. That's actually the value computed directly by the 'bsr' instruction, but if we match it too late, we'll get completely redundant xor instructions. The more naive code for the above (subtracting rather than using an xor) still isn't handled correctly due to the dagcombine getting confused. Also, while here fix an issue spotted by inspection: we should have been expanding the zero-undef variants to the normal variants when there is an 'lzcnt' instruction. Do so, and test for this. We don't want to generate unnecessary 'bsr' instructions. These two changes fix some regressions in encoding and decoding benchmarks. However, there is still a *lot* to be improve on in this type of code. llvm-svn: 147244
author: Chandler Carruth <chandlerc@gmail.com> 2011-12-24 10:55:54 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2011-12-24 10:55:54 +0000
commit: 7e9453e916a52ddaff22117602721295b68ae0ce (patch)
tree: 958c79687a162c06feba3e33521fd7b4da9bbf48 /llvm/test/CodeGen/X86/clz.ll
parent: 15075d4b19f55d7dca8597487f9f6177fec329da (diff)
download: bcm5719-llvm-7e9453e916a52ddaff22117602721295b68ae0ce.tar.gz
bcm5719-llvm-7e9453e916a52ddaff22117602721295b68ae0ce.zip
1 files changed, 26 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index 4e080309b6f..ad47bde8549 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -31,7 +31,7 @@ entry:
 ; CHECK: t3:
 ; CHECK: bsrw
 ; CHECK-NOT: cmov
-; CHECK: xorw $15,
+; CHECK: xorl $15,
 ; CHECK: ret
 }
 
@@ -63,3 +63,28 @@ entry:
   %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @t6(i32 %n) nounwind {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: t6:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @t7(i32 %n) nounwind {
+entry:
+; Same as t6, but ensure this happens even when there is a potential zero.
+; CHECK: t7:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
author	Chandler Carruth <chandlerc@gmail.com>	2011-12-24 10:55:54 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2011-12-24 10:55:54 +0000
commit	7e9453e916a52ddaff22117602721295b68ae0ce (patch)
tree	958c79687a162c06feba3e33521fd7b4da9bbf48 /llvm/test/CodeGen/X86/clz.ll
parent	15075d4b19f55d7dca8597487f9f6177fec329da (diff)
download	bcm5719-llvm-7e9453e916a52ddaff22117602721295b68ae0ce.tar.gz bcm5719-llvm-7e9453e916a52ddaff22117602721295b68ae0ce.zip