diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-05-14 17:50:40 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-05-14 17:50:40 +0000 |
| commit | 25de41cfbcb6e374a244ac4c9f76c74e669459fd (patch) | |
| tree | 58ea32aba0123d940e60cdde5f3b37db242c6332 /clang/lib/Headers/avx512vlintrin.h | |
| parent | 64a2ea41eaca32a6ba66db6b699ca33b2eee095b (diff) | |
| download | bcm5719-llvm-25de41cfbcb6e374a244ac4c9f76c74e669459fd.tar.gz bcm5719-llvm-25de41cfbcb6e374a244ac4c9f76c74e669459fd.zip | |
[X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins.
As long as the destination type is a 256 or 128 bit vector with the same number of elements we can use __builtin_convertvector to directly generate trunc IR instruction which will be handled natively by the backend.
Differential Revision: https://reviews.llvm.org/D46742
llvm-svn: 332266
Diffstat (limited to 'clang/lib/Headers/avx512vlintrin.h')
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2581c05c5d8..07429953f91 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7627,24 +7627,23 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi)_mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi) __O, __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm256_cvtepi32_epi16(__A), + (__v8hi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, - (__v8hi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, + (__v8hi)_mm256_cvtepi32_epi16(__A), + (__v8hi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7743,24 +7742,23 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi32 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) __O, __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { - return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm256_cvtepi64_epi32(__A), + (__v4si)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS |

