summaryrefslogtreecommitdiffstats
path: root/clang/lib/Headers/avx512dqintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Headers/avx512dqintrin.h')
-rw-r--r--clang/lib/Headers/avx512dqintrin.h96
1 files changed, 12 insertions, 84 deletions
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 9ef78041d1c..fbb4bbce8df 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1103,16 +1103,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
}
#define _mm512_extractf32x8_ps(A, imm) \
- (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- ((imm) & 1) ? 8 : 0, \
- ((imm) & 1) ? 9 : 1, \
- ((imm) & 1) ? 10 : 2, \
- ((imm) & 1) ? 11 : 3, \
- ((imm) & 1) ? 12 : 4, \
- ((imm) & 1) ? 13 : 5, \
- ((imm) & 1) ? 14 : 6, \
- ((imm) & 1) ? 15 : 7)
+ (__m256)__builtin_ia32_extractf32x8((__v16sf)(__m512)(A), (int)(imm))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
@@ -1125,10 +1116,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8sf)_mm256_setzero_ps())
#define _mm512_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_undefined_pd(), \
- 0 + ((imm) & 0x3) * 2, \
- 1 + ((imm) & 0x3) * 2)
+ (__m128d)__builtin_ia32_extractf64x2_512((__v8df)(__m512d)(A), (int)(imm))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
@@ -1141,16 +1129,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd())
#define _mm512_extracti32x8_epi32(A, imm) \
- (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_undefined_epi32(), \
- ((imm) & 1) ? 8 : 0, \
- ((imm) & 1) ? 9 : 1, \
- ((imm) & 1) ? 10 : 2, \
- ((imm) & 1) ? 11 : 3, \
- ((imm) & 1) ? 12 : 4, \
- ((imm) & 1) ? 13 : 5, \
- ((imm) & 1) ? 14 : 6, \
- ((imm) & 1) ? 15 : 7)
+ (__m256i)__builtin_ia32_extracti32x8((__v16si)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
@@ -1163,10 +1142,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8si)_mm256_setzero_si256())
#define _mm512_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_undefined_epi32(), \
- 0 + ((imm) & 0x3) * 2, \
- 1 + ((imm) & 0x3) * 2)
+ (__m128i)__builtin_ia32_extracti64x2_512((__v8di)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
@@ -1179,24 +1155,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v2di)_mm_setzero_si128())
#define _mm512_insertf32x8(A, B, imm) \
- (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
- ((imm) & 0x1) ? 0 : 16, \
- ((imm) & 0x1) ? 1 : 17, \
- ((imm) & 0x1) ? 2 : 18, \
- ((imm) & 0x1) ? 3 : 19, \
- ((imm) & 0x1) ? 4 : 20, \
- ((imm) & 0x1) ? 5 : 21, \
- ((imm) & 0x1) ? 6 : 22, \
- ((imm) & 0x1) ? 7 : 23, \
- ((imm) & 0x1) ? 16 : 8, \
- ((imm) & 0x1) ? 17 : 9, \
- ((imm) & 0x1) ? 18 : 10, \
- ((imm) & 0x1) ? 19 : 11, \
- ((imm) & 0x1) ? 20 : 12, \
- ((imm) & 0x1) ? 21 : 13, \
- ((imm) & 0x1) ? 22 : 14, \
- ((imm) & 0x1) ? 23 : 15)
+ (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm))
#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
@@ -1209,16 +1169,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v16sf)_mm512_setzero_ps())
#define _mm512_insertf64x2(A, B, imm) \
- (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
- (((imm) & 0x3) == 0) ? 8 : 0, \
- (((imm) & 0x3) == 0) ? 9 : 1, \
- (((imm) & 0x3) == 1) ? 8 : 2, \
- (((imm) & 0x3) == 1) ? 9 : 3, \
- (((imm) & 0x3) == 2) ? 8 : 4, \
- (((imm) & 0x3) == 2) ? 9 : 5, \
- (((imm) & 0x3) == 3) ? 8 : 6, \
- (((imm) & 0x3) == 3) ? 9 : 7)
+ (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
+ (__v2df)(__m128d)(B), (int)(imm))
#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@@ -1231,24 +1183,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8df)_mm512_setzero_pd())
#define _mm512_inserti32x8(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
- ((imm) & 0x1) ? 0 : 16, \
- ((imm) & 0x1) ? 1 : 17, \
- ((imm) & 0x1) ? 2 : 18, \
- ((imm) & 0x1) ? 3 : 19, \
- ((imm) & 0x1) ? 4 : 20, \
- ((imm) & 0x1) ? 5 : 21, \
- ((imm) & 0x1) ? 6 : 22, \
- ((imm) & 0x1) ? 7 : 23, \
- ((imm) & 0x1) ? 16 : 8, \
- ((imm) & 0x1) ? 17 : 9, \
- ((imm) & 0x1) ? 18 : 10, \
- ((imm) & 0x1) ? 19 : 11, \
- ((imm) & 0x1) ? 20 : 12, \
- ((imm) & 0x1) ? 21 : 13, \
- ((imm) & 0x1) ? 22 : 14, \
- ((imm) & 0x1) ? 23 : 15)
+ (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm))
#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
@@ -1261,16 +1197,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v16si)_mm512_setzero_si512())
#define _mm512_inserti64x2(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
- (((imm) & 0x3) == 0) ? 8 : 0, \
- (((imm) & 0x3) == 0) ? 9 : 1, \
- (((imm) & 0x3) == 1) ? 8 : 2, \
- (((imm) & 0x3) == 1) ? 9 : 3, \
- (((imm) & 0x3) == 2) ? 8 : 4, \
- (((imm) & 0x3) == 2) ? 9 : 5, \
- (((imm) & 0x3) == 3) ? 8 : 6, \
- (((imm) & 0x3) == 3) ? 9 : 7)
+ (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm))
#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
OpenPOWER on IntegriCloud