diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-11 01:26:52 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-11 01:26:52 +0000 |
commit | 678b07fe3ca59571f3559ab911305175d7c16b2d (patch) | |
tree | 933a88751eba5d5e5439585e9717774789db7709 /clang/lib | |
parent | 1f1b441267795db464d2200ecfc6706696c2b77f (diff) | |
download | bcm5719-llvm-678b07fe3ca59571f3559ab911305175d7c16b2d.tar.gz bcm5719-llvm-678b07fe3ca59571f3559ab911305175d7c16b2d.zip |
[AVX-512] Remove masking from 512-bit vpermil builtins. The backend now has versions without masking so wrap it with select.
This will allow the backend to constant fold these to generic shuffle vectors like 128-bit and 256-bit without having to working about handling masking.
llvm-svn: 289351
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 54 |
1 files changed, 20 insertions, 34 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 86999574c8b..71b645230d5 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -6588,61 +6588,47 @@ _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, (__v16sf)_mm512_setzero_ps()); }) static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_permutevar_pd (__m512d __A, __m512i __C) +_mm512_permutevar_pd(__m512d __A, __m512i __C) { - return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, - (__v8di) __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) +_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { - return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, - (__v8di) __C, - (__v8df) __W, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_permutevar_pd(__A, __C), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) +_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { - return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, - (__v8di) __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_permutevar_pd(__A, __C), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_permutevar_ps (__m512 __A, __m512i __C) +_mm512_permutevar_ps(__m512 __A, __m512i __C) { - return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, - (__v16si) __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) +_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { - return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, - (__v16si) __C, - (__v16sf) __W, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_permutevar_ps(__A, __C), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) +_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { - return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, - (__v16si) __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_permutevar_ps(__A, __C), + (__v16sf)_mm512_setzero_ps()); } static __inline __m512d __DEFAULT_FN_ATTRS |