summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen/bmi-builtins.c
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-09-26 17:01:44 +0000
committerCraig Topper <craig.topper@intel.com>2018-09-26 17:01:44 +0000
commitfb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e (patch)
treecf7f030a2ede8265d12d1d2ded4057f05dbcc157 /clang/test/CodeGen/bmi-builtins.c
parent344475fce536e2f2f88d5e3b0a7bde51a2149341 (diff)
downloadbcm5719-llvm-fb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e.tar.gz
bcm5719-llvm-fb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e.zip
[X86] For lzcnt/tzcnt intrinsics use cttz/ctlz intrinsics with zero_undef flag set to false.
Previously we used a select and the zero_undef=true intrinsic. In -O2 this pattern will get optimized to zero_undef=false. But in -O0 this optimization won't happen. This results in a compare and cmov being wrapped around a tzcnt/lzcnt instruction. By using the zero_undef=false intrinsic directly without the select, we can improve the -O0 codegen to just an lzcnt/tzcnt instruction. Differential Revision: https://reviews.llvm.org/D52392 llvm-svn: 343126
Diffstat (limited to 'clang/test/CodeGen/bmi-builtins.c')
-rw-r--r--clang/test/CodeGen/bmi-builtins.c26
1 files changed, 8 insertions, 18 deletions
diff --git a/clang/test/CodeGen/bmi-builtins.c b/clang/test/CodeGen/bmi-builtins.c
index 91271f0b359..69eb2bdb807 100644
--- a/clang/test/CodeGen/bmi-builtins.c
+++ b/clang/test/CodeGen/bmi-builtins.c
@@ -15,9 +15,7 @@
unsigned short test__tzcnt_u16(unsigned short __X) {
// CHECK-LABEL: test__tzcnt_u16
- // CHECK: zext i16 %{{.*}} to i32
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 true)
+ // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
return __tzcnt_u16(__X);
}
@@ -57,15 +55,13 @@ unsigned int test__blsr_u32(unsigned int __X) {
unsigned int test__tzcnt_u32(unsigned int __X) {
// CHECK-LABEL: test__tzcnt_u32
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
+ // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
return __tzcnt_u32(__X);
}
int test_mm_tzcnt_32(unsigned int __X) {
// CHECK-LABEL: test_mm_tzcnt_32
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
+ // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
return _mm_tzcnt_32(__X);
}
@@ -105,15 +101,13 @@ unsigned long long test__blsr_u64(unsigned long long __X) {
unsigned long long test__tzcnt_u64(unsigned long long __X) {
// CHECK-LABEL: test__tzcnt_u64
- // CHECK: icmp ne i64 %{{.*}}, 0
- // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
+ // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return __tzcnt_u64(__X);
}
long long test_mm_tzcnt_64(unsigned long long __X) {
// CHECK-LABEL: test_mm_tzcnt_64
- // CHECK: icmp ne i64 %{{.*}}, 0
- // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
+ // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _mm_tzcnt_64(__X);
}
@@ -121,9 +115,7 @@ long long test_mm_tzcnt_64(unsigned long long __X) {
unsigned short test_tzcnt_u16(unsigned short __X) {
// CHECK-LABEL: test_tzcnt_u16
- // CHECK: zext i16 %{{.*}} to i32
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 true)
+ // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
return _tzcnt_u16(__X);
}
@@ -168,8 +160,7 @@ unsigned int test_blsr_u32(unsigned int __X) {
unsigned int test_tzcnt_u32(unsigned int __X) {
// CHECK-LABEL: test_tzcnt_u32
- // CHECK: icmp ne i32 %{{.*}}, 0
- // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
+ // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
return _tzcnt_u32(__X);
}
@@ -215,7 +206,6 @@ unsigned long long test_blsr_u64(unsigned long long __X) {
unsigned long long test_tzcnt_u64(unsigned long long __X) {
// CHECK-LABEL: test_tzcnt_u64
- // CHECK: icmp ne i64 %{{.*}}, 0
- // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
+ // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _tzcnt_u64(__X);
}
OpenPOWER on IntegriCloud