diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-05-14 17:50:40 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-05-14 17:50:40 +0000 |
| commit | 25de41cfbcb6e374a244ac4c9f76c74e669459fd (patch) | |
| tree | 58ea32aba0123d940e60cdde5f3b37db242c6332 /clang/lib | |
| parent | 64a2ea41eaca32a6ba66db6b699ca33b2eee095b (diff) | |
| download | bcm5719-llvm-25de41cfbcb6e374a244ac4c9f76c74e669459fd.tar.gz bcm5719-llvm-25de41cfbcb6e374a244ac4c9f76c74e669459fd.zip | |
[X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins.
As long as the destination type is a 256 or 128 bit vector with the same number of elements we can use __builtin_convertvector to directly generate trunc IR instruction which will be handled natively by the backend.
Differential Revision: https://reviews.llvm.org/D46742
llvm-svn: 332266
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx512bwintrin.h | 16 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 60 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlbwintrin.h | 16 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 30 |
4 files changed, 56 insertions, 66 deletions
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index f5ff5d3c10a..499bb8a6906 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1157,23 +1157,21 @@ _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi16_epi8 (__m512i __A) { - return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, - (__v32qi) _mm256_setzero_si256(), - (__mmask32) -1); + return (__m256i)__builtin_convertvector((__v32hi)__A, __v32qi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, - (__v32qi) __O, - __M); + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm512_cvtepi16_epi8(__A), + (__v32qi)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, - (__v32qi) _mm256_setzero_si256(), - __M); + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm512_cvtepi16_epi8(__A), + (__v32qi)_mm256_setzero_si256()); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e891b6c2d1c..24a4e9fef24 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -7601,24 +7601,23 @@ _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi8 (__m512i __A) { - return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, - (__v16qi) _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { - return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, - (__v16qi) __O, __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + (__v16qi)_mm512_cvtepi32_epi8(__A), + (__v16qi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) { - return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, - (__v16qi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + (__v16qi)_mm512_cvtepi32_epi8(__A), + (__v16qi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7630,24 +7629,23 @@ _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi16 (__m512i __A) { - return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, - (__v16hi) _mm256_undefined_si256 (), - (__mmask16) -1); + return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, - (__v16hi) __O, __M); + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, + (__v16hi)_mm512_cvtepi32_epi16(__A), + (__v16hi)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, - (__v16hi) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, + (__v16hi)_mm512_cvtepi32_epi16(__A), + (__v16hi)_mm256_setzero_si256()); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7688,24 +7686,23 @@ _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi32 (__m512i __A) { - return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, - (__v8si) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_convertvector((__v8di) __A, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, - (__v8si) __O, __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm512_cvtepi64_epi32(__A), + (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) { - return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, - (__v8si) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm512_cvtepi64_epi32(__A), + (__v8si)_mm256_setzero_si256()); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7717,24 +7714,23 @@ _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi16 (__m512i __A) { - return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, - (__v8hi) _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { - return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, - (__v8hi) __O, __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm512_cvtepi64_epi16(__A), + (__v8hi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) { - return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, - (__v8hi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm512_cvtepi64_epi16(__A), + (__v8hi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index e940e2b6853..6c5131ccba9 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1551,23 +1551,21 @@ _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask16) -1); + return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) __O, - __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + (__v16qi)_mm256_cvtepi16_epi8(__A), + (__v16qi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, - (__v16qi) _mm_setzero_si128(), - __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + (__v16qi)_mm256_cvtepi16_epi8(__A), + (__v16qi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2581c05c5d8..07429953f91 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7627,24 +7627,23 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi)_mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi) __O, __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm256_cvtepi32_epi16(__A), + (__v8hi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm256_cvtepi32_epi16(__A), + (__v8hi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7743,24 +7742,23 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi32 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) __O, __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS |

