diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-13 07:19:28 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-13 07:19:28 +0000 |
| commit | 2527c378c67bc274c0a93bb0fabbbabd95405699 (patch) | |
| tree | 07f2e72e3915bd5853bc552bf299ea3c0ec1fd16 /clang/lib | |
| parent | 3829d258ee1824dd9ad49b4b2b45cb72478bfa96 (diff) | |
| download | bcm5719-llvm-2527c378c67bc274c0a93bb0fabbbabd95405699.tar.gz bcm5719-llvm-2527c378c67bc274c0a93bb0fabbbabd95405699.zip | |
[X86] Remove masking from avx512vbmi2 concat and shift by immediate builtins. Use select builtins instead.
llvm-svn: 334577
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx512vbmi2intrin.h | 152 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlvbmi2intrin.h | 302 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 36 |
3 files changed, 182 insertions, 308 deletions
diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index e0ada4c16f3..01a75a70bb6 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -142,131 +142,89 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) __U); } +#define _mm512_shldi_epi64(A, B, I) \ + (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) + #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)(__m512i)(S), \ - (__mmask8)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) -#define _mm512_shldi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1) +#define _mm512_shldi_epi32(A, B, I) \ + (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)(__m512i)(S), \ - (__mmask16)(U)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) -#define _mm512_shldi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1) +#define _mm512_shldi_epi16(A, B, I) \ + (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)(__m512i)(S), \ - (__mmask32)(U)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) -#define _mm512_shldi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1) +#define _mm512_shrdi_epi64(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)(__m512i)(S), \ - (__mmask8)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) -#define _mm512_shrdi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1) +#define _mm512_shrdi_epi32(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)(__m512i)(S), \ - (__mmask16)(U)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) -#define _mm512_shrdi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1) +#define _mm512_shrdi_epi16(A, B, I) \ + (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)(__m512i)(S), \ - (__mmask32)(U)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)) - -#define _mm512_shrdi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 6dde1e9643e..38ef9b0f849 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -251,257 +251,173 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) __U); } +#define _mm256_shldi_epi64(A, B, I) \ + (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) + #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)(__m256i)(S), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) -#define _mm256_shldi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1) +#define _mm_shldi_epi64(A, B, I) \ + (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) #define _mm_mask_shldi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) #define _mm_maskz_shldi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) -#define _mm_shldi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) +#define _mm256_shldi_epi32(A, B, I) \ + (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)(__m256i)(S), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) -#define _mm256_shldi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1) +#define _mm_shldi_epi32(A, B, I) \ + (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) #define _mm_mask_shldi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) #define _mm_maskz_shldi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) -#define _mm_shldi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) +#define _mm256_shldi_epi16(A, B, I) \ + (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)(__m256i)(S), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) -#define _mm256_shldi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1) +#define _mm_shldi_epi16(A, B, I) \ + (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) #define _mm_mask_shldi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) #define _mm_maskz_shldi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) -#define _mm_shldi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1) +#define _mm256_shrdi_epi64(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)(__m256i)(S), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) -#define _mm256_shrdi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1) +#define _mm_shrdi_epi64(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) #define _mm_maskz_shrdi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) -#define _mm_shrdi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) +#define _mm256_shrdi_epi32(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)(__m256i)(S), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) -#define _mm256_shrdi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1) +#define _mm_shrdi_epi32(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) #define _mm_maskz_shrdi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) -#define _mm_shrdi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) +#define _mm256_shrdi_epi16(A, B, I) \ + (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)(__m256i)(S), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) -#define _mm256_shrdi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1) +#define _mm_shrdi_epi16(A, B, I) \ + (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)(__m128i)(S), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) #define _mm_maskz_shrdi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) - -#define _mm_shrdi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d5945ef6f62..6a1cbdf6210 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2841,24 +2841,24 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_dbpsadbw128: case X86::BI__builtin_ia32_dbpsadbw256: case X86::BI__builtin_ia32_dbpsadbw512: - case X86::BI__builtin_ia32_vpshldd128_mask: - case X86::BI__builtin_ia32_vpshldd256_mask: - case X86::BI__builtin_ia32_vpshldd512_mask: - case X86::BI__builtin_ia32_vpshldq128_mask: - case X86::BI__builtin_ia32_vpshldq256_mask: - case X86::BI__builtin_ia32_vpshldq512_mask: - case X86::BI__builtin_ia32_vpshldw128_mask: - case X86::BI__builtin_ia32_vpshldw256_mask: - case X86::BI__builtin_ia32_vpshldw512_mask: - case X86::BI__builtin_ia32_vpshrdd128_mask: - case X86::BI__builtin_ia32_vpshrdd256_mask: - case X86::BI__builtin_ia32_vpshrdd512_mask: - case X86::BI__builtin_ia32_vpshrdq128_mask: - case X86::BI__builtin_ia32_vpshrdq256_mask: - case X86::BI__builtin_ia32_vpshrdq512_mask: - case X86::BI__builtin_ia32_vpshrdw128_mask: - case X86::BI__builtin_ia32_vpshrdw256_mask: - case X86::BI__builtin_ia32_vpshrdw512_mask: + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: i = 2; l = 0; u = 255; break; case X86::BI__builtin_ia32_fixupimmpd512_mask: |

