diff options
author | Craig Topper <craig.topper@intel.com> | 2018-07-07 17:03:34 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-07-07 17:03:34 +0000 |
commit | 218da62091363ebafb799a99d2ef378cf98c6bd7 (patch) | |
tree | 92ef435a418c8bb642775272788f4d9873233efa | |
parent | 5cbeeedd27a4d766652694b9dcb1084b2d80ae2b (diff) | |
download | bcm5719-llvm-218da62091363ebafb799a99d2ef378cf98c6bd7.tar.gz bcm5719-llvm-218da62091363ebafb799a99d2ef378cf98c6bd7.zip |
[X86] Change _mm512_shuffle_pd and _mm512_shuffle_ps to use target specific shuffle builtins instead of generic __builtin_shufflevector.
I added the builtins for 128, 256, and 512 bits recently but looks like I failed to convert to using the 512 bit one.
llvm-svn: 336488
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 32 |
1 files changed, 4 insertions, 28 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index cbee4413be4..a3ca7e730a1 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -6699,16 +6699,8 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) (__v8di)_mm512_setzero_si512()) #define _mm512_shuffle_pd(A, B, M) \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - 0 + (((M) >> 0) & 0x1), \ - 8 + (((M) >> 1) & 0x1), \ - 2 + (((M) >> 2) & 0x1), \ - 10 + (((M) >> 3) & 0x1), \ - 4 + (((M) >> 4) & 0x1), \ - 12 + (((M) >> 5) & 0x1), \ - 6 + (((M) >> 6) & 0x1), \ - 14 + (((M) >> 7) & 0x1)) + (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(M)) #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -6721,24 +6713,8 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) (__v8df)_mm512_setzero_pd()) #define _mm512_shuffle_ps(A, B, M) \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - 0 + (((M) >> 0) & 0x3), \ - 0 + (((M) >> 2) & 0x3), \ - 16 + (((M) >> 4) & 0x3), \ - 16 + (((M) >> 6) & 0x3), \ - 4 + (((M) >> 0) & 0x3), \ - 4 + (((M) >> 2) & 0x3), \ - 20 + (((M) >> 4) & 0x3), \ - 20 + (((M) >> 6) & 0x3), \ - 8 + (((M) >> 0) & 0x3), \ - 8 + (((M) >> 2) & 0x3), \ - 24 + (((M) >> 4) & 0x3), \ - 24 + (((M) >> 6) & 0x3), \ - 12 + (((M) >> 0) & 0x3), \ - 12 + (((M) >> 2) & 0x3), \ - 28 + (((M) >> 4) & 0x3), \ - 28 + (((M) >> 6) & 0x3)) + (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(M)) #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |