diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-06-11 12:50:19 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-06-11 12:50:19 +0000 |
| commit | 7cc9263ec2fe37a2c673eb02254bb3124036273e (patch) | |
| tree | 4f1ee5686bf1fafc3d7c43cf9c4575795a77b226 /clang/lib | |
| parent | 26d5b873165269d14576a2d896e2b4458919748f (diff) | |
| download | bcm5719-llvm-7cc9263ec2fe37a2c673eb02254bb3124036273e.tar.gz bcm5719-llvm-7cc9263ec2fe37a2c673eb02254bb3124036273e.zip | |
[AVX512] Implement masked and 512-bit pshufd intrinsics directly with __builtin_shufflevector and __builtin_ia32_select.
llvm-svn: 272467
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 33 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 25 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 3 |
3 files changed, 36 insertions, 25 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index b1f0ebf50f1..90b87b2586c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9052,19 +9052,34 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) } #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1); }) + (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ + (__v16si)_mm512_setzero_si512(), \ + 0 + (((I) & 0x03) >> 0), \ + 0 + (((I) & 0x0c) >> 2), \ + 0 + (((I) & 0x30) >> 4), \ + 0 + (((I) & 0xc0) >> 6), \ + 4 + (((I) & 0x03) >> 0), \ + 4 + (((I) & 0x0c) >> 2), \ + 4 + (((I) & 0x30) >> 4), \ + 4 + (((I) & 0xc0) >> 6), \ + 8 + (((I) & 0x03) >> 0), \ + 8 + (((I) & 0x0c) >> 2), \ + 8 + (((I) & 0x30) >> 4), \ + 8 + (((I) & 0xc0) >> 6), \ + 12 + (((I) & 0x03) >> 0), \ + 12 + (((I) & 0x0c) >> 2), \ + 12 + (((I) & 0x30) >> 4), \ + 12 + (((I) & 0xc0) >> 6)); }) #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)(__m512i)(W)); }) #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)_mm512_setzero_si512()); }) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 62e5e863aea..830be8d1590 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -9257,25 +9257,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) } #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)(__m256i)(W)); }) #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)_mm256_setzero_si256()); }) #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)(__m128i)(W)); }) #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) - + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)_mm_setzero_si128()); }) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 6f944e0d928..6bd910ed586 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1566,9 +1566,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: - case X86::BI__builtin_ia32_pshufd512_mask: - case X86::BI__builtin_ia32_pshufd256_mask: - case X86::BI__builtin_ia32_pshufd128_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: |

