diff options
author | Craig Topper <craig.topper@intel.com> | 2018-09-26 17:01:44 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-09-26 17:01:44 +0000 |
commit | fb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e (patch) | |
tree | cf7f030a2ede8265d12d1d2ded4057f05dbcc157 /clang/test/CodeGen/bmi-builtins.c | |
parent | 344475fce536e2f2f88d5e3b0a7bde51a2149341 (diff) | |
download | bcm5719-llvm-fb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e.tar.gz bcm5719-llvm-fb5d9f2849a3005e1ccd9554030c5f1a30bf3c0e.zip |
[X86] For lzcnt/tzcnt intrinsics use cttz/ctlz intrinsics with zero_undef flag set to false.
Previously we used a select and the zero_undef=true intrinsic. In -O2 this pattern will get optimized to zero_undef=false. But in -O0 this optimization won't happen. This results in a compare and cmov being wrapped around a tzcnt/lzcnt instruction.
By using the zero_undef=false intrinsic directly without the select, we can improve the -O0 codegen to just an lzcnt/tzcnt instruction.
Differential Revision: https://reviews.llvm.org/D52392
llvm-svn: 343126
Diffstat (limited to 'clang/test/CodeGen/bmi-builtins.c')
-rw-r--r-- | clang/test/CodeGen/bmi-builtins.c | 26 |
1 files changed, 8 insertions, 18 deletions
diff --git a/clang/test/CodeGen/bmi-builtins.c b/clang/test/CodeGen/bmi-builtins.c index 91271f0b359..69eb2bdb807 100644 --- a/clang/test/CodeGen/bmi-builtins.c +++ b/clang/test/CodeGen/bmi-builtins.c @@ -15,9 +15,7 @@ unsigned short test__tzcnt_u16(unsigned short __X) { // CHECK-LABEL: test__tzcnt_u16 - // CHECK: zext i16 %{{.*}} to i32 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 true) + // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) return __tzcnt_u16(__X); } @@ -57,15 +55,13 @@ unsigned int test__blsr_u32(unsigned int __X) { unsigned int test__tzcnt_u32(unsigned int __X) { // CHECK-LABEL: test__tzcnt_u32 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) return __tzcnt_u32(__X); } int test_mm_tzcnt_32(unsigned int __X) { // CHECK-LABEL: test_mm_tzcnt_32 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) return _mm_tzcnt_32(__X); } @@ -105,15 +101,13 @@ unsigned long long test__blsr_u64(unsigned long long __X) { unsigned long long test__tzcnt_u64(unsigned long long __X) { // CHECK-LABEL: test__tzcnt_u64 - // CHECK: icmp ne i64 %{{.*}}, 0 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return __tzcnt_u64(__X); } long long test_mm_tzcnt_64(unsigned long long __X) { // CHECK-LABEL: test_mm_tzcnt_64 - // CHECK: icmp ne i64 %{{.*}}, 0 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _mm_tzcnt_64(__X); } @@ -121,9 +115,7 @@ long long test_mm_tzcnt_64(unsigned long long __X) { unsigned short test_tzcnt_u16(unsigned short __X) { // CHECK-LABEL: test_tzcnt_u16 - // CHECK: zext i16 %{{.*}} to i32 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 true) + // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) return _tzcnt_u16(__X); } @@ -168,8 +160,7 @@ unsigned int test_blsr_u32(unsigned int __X) { unsigned int test_tzcnt_u32(unsigned int __X) { // CHECK-LABEL: test_tzcnt_u32 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) + // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) return _tzcnt_u32(__X); } @@ -215,7 +206,6 @@ unsigned long long test_blsr_u64(unsigned long long __X) { unsigned long long test_tzcnt_u64(unsigned long long __X) { // CHECK-LABEL: test_tzcnt_u64 - // CHECK: icmp ne i64 %{{.*}}, 0 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) + // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _tzcnt_u64(__X); } |