diff options
| -rw-r--r-- | clang/lib/Headers/bmiintrin.h | 175 | ||||
| -rw-r--r-- | clang/lib/Headers/immintrin.h | 3 | ||||
| -rw-r--r-- | clang/test/CodeGen/bmi-builtins.c | 96 |
3 files changed, 144 insertions, 130 deletions
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h index b7af62f609a..841bd84070e 100644 --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -14,27 +14,13 @@ #ifndef __BMIINTRIN_H #define __BMIINTRIN_H -#define _tzcnt_u16(a) (__tzcnt_u16((a))) - -#define _andn_u32(a, b) (__andn_u32((a), (b))) - -/* _bextr_u32 != __bextr_u32 */ -#define _blsi_u32(a) (__blsi_u32((a))) - -#define _blsmsk_u32(a) (__blsmsk_u32((a))) - -#define _blsr_u32(a) (__blsr_u32((a))) - -#define _tzcnt_u32(a) (__tzcnt_u32((a))) - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) - /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define _tzcnt_u16(a) (__tzcnt_u16((a))) + /// Counts the number of trailing zero bits in the operand. /// /// \headerfile <x86intrin.h> @@ -51,6 +37,94 @@ __tzcnt_u16(unsigned short __X) return __builtin_ia32_tzcnt_u16(__X); } +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> TZCNT </c> instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 32-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned int __RELAXED_FN_ATTRS +__tzcnt_u32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> TZCNT </c> instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +#define _tzcnt_u32(a) (__tzcnt_u32((a))) + +#ifdef __x86_64__ + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> TZCNT </c> instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 64-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned long long __RELAXED_FN_ATTRS +__tzcnt_u64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> TZCNT </c> instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An 64-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +#define _tzcnt_u64(a) (__tzcnt_u64((a))) + +#endif /* __x86_64__ */ + +#undef __RELAXED_FN_ATTRS + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) + +#define _andn_u32(a, b) (__andn_u32((a), (b))) + +/* _bextr_u32 != __bextr_u32 */ +#define _blsi_u32(a) (__blsi_u32((a))) + +#define _blsmsk_u32(a) (__blsmsk_u32((a))) + +#define _blsr_u32(a) (__blsr_u32((a))) + /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -169,38 +243,6 @@ __blsr_u32(unsigned int __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile <x86intrin.h> -/// -/// This intrinsic corresponds to the <c> TZCNT </c> instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 32-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned int __RELAXED_FN_ATTRS -__tzcnt_u32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile <x86intrin.h> -/// -/// This intrinsic corresponds to the <c> TZCNT </c> instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An 32-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ int __RELAXED_FN_ATTRS -_mm_tzcnt_32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - #ifdef __x86_64__ #define _andn_u64(a, b) (__andn_u64((a), (b))) @@ -212,8 +254,6 @@ _mm_tzcnt_32(unsigned int __X) #define _blsr_u64(a) (__blsr_u64((a))) -#define _tzcnt_u64(a) (__tzcnt_u64((a))) - /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -332,41 +372,10 @@ __blsr_u64(unsigned long long __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile <x86intrin.h> -/// -/// This intrinsic corresponds to the <c> TZCNT </c> instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 64-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned long long __RELAXED_FN_ATTRS -__tzcnt_u64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile <x86intrin.h> -/// -/// This intrinsic corresponds to the <c> TZCNT </c> instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An 64-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ long long __RELAXED_FN_ATTRS -_mm_tzcnt_64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS -#undef __RELAXED_FN_ATTRS + +#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 7555ad82fac..ae900ee85b7 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -64,9 +64,8 @@ #include <vpclmulqdqintrin.h> #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) +/* No feature check desired due to internal checks */ #include <bmiintrin.h> -#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) #include <bmi2intrin.h> diff --git a/clang/test/CodeGen/bmi-builtins.c b/clang/test/CodeGen/bmi-builtins.c index 9eda3f614d4..9f2d776299f 100644 --- a/clang/test/CodeGen/bmi-builtins.c +++ b/clang/test/CodeGen/bmi-builtins.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK_TZCNT +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=CHECK-TZCNT #include <immintrin.h> @@ -13,12 +14,57 @@ // instruction is identical in hardware, the AMD and Intel // intrinsics are different! +unsigned short test_tzcnt_u16(unsigned short __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u16 + // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) + return _tzcnt_u16(__X); +} + unsigned short test__tzcnt_u16(unsigned short __X) { - // CHECK-LABEL: test__tzcnt_u16 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) + // CHECK-TZCNT-LABEL: test__tzcnt_u16 + // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) return __tzcnt_u16(__X); } +unsigned int test__tzcnt_u32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test__tzcnt_u32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return __tzcnt_u32(__X); +} + +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test_mm_tzcnt_32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return _mm_tzcnt_32(__X); +} + +unsigned int test_tzcnt_u32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return _tzcnt_u32(__X); +} + +#ifdef __x86_64__ +unsigned long long test__tzcnt_u64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test__tzcnt_u64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return __tzcnt_u64(__X); +} + +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test_mm_tzcnt_64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return _mm_tzcnt_64(__X); +} + +unsigned long long test_tzcnt_u64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return _tzcnt_u64(__X); +} +#endif + +#if !defined(TEST_TZCNT) unsigned int test__andn_u32(unsigned int __X, unsigned int __Y) { // CHECK-LABEL: test__andn_u32 // CHECK: xor i32 %{{.*}}, -1 @@ -53,18 +99,6 @@ unsigned int test__blsr_u32(unsigned int __X) { return __blsr_u32(__X); } -unsigned int test__tzcnt_u32(unsigned int __X) { - // CHECK-LABEL: test__tzcnt_u32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return __tzcnt_u32(__X); -} - -int test_mm_tzcnt_32(unsigned int __X) { - // CHECK-LABEL: test_mm_tzcnt_32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return _mm_tzcnt_32(__X); -} - #ifdef __x86_64__ unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 @@ -99,28 +133,10 @@ unsigned long long test__blsr_u64(unsigned long long __X) { // CHECK: and i64 %{{.*}}, %{{.*}} return __blsr_u64(__X); } - -unsigned long long test__tzcnt_u64(unsigned long long __X) { - // CHECK-LABEL: test__tzcnt_u64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return __tzcnt_u64(__X); -} - -long long test_mm_tzcnt_64(unsigned long long __X) { - // CHECK-LABEL: test_mm_tzcnt_64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return _mm_tzcnt_64(__X); -} #endif // Intel intrinsics -unsigned short test_tzcnt_u16(unsigned short __X) { - // CHECK-LABEL: test_tzcnt_u16 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) - return _tzcnt_u16(__X); -} - unsigned int test_andn_u32(unsigned int __X, unsigned int __Y) { // CHECK-LABEL: test_andn_u32 // CHECK: xor i32 %{{.*}}, -1 @@ -160,12 +176,6 @@ unsigned int test_blsr_u32(unsigned int __X) { return _blsr_u32(__X); } -unsigned int test_tzcnt_u32(unsigned int __X) { - // CHECK-LABEL: test_tzcnt_u32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return _tzcnt_u32(__X); -} - #ifdef __x86_64__ unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test_andn_u64 @@ -206,10 +216,6 @@ unsigned long long test_blsr_u64(unsigned long long __X) { // CHECK: and i64 %{{.*}}, %{{.*}} return _blsr_u64(__X); } - -unsigned long long test_tzcnt_u64(unsigned long long __X) { - // CHECK-LABEL: test_tzcnt_u64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return _tzcnt_u64(__X); -} #endif + +#endif // !defined(TEST_TZCNT) |

