diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 46 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vbmi2intrin.h | 158 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlvbmi2intrin.h | 312 |
3 files changed, 244 insertions, 272 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 93484f82c30..ca7b4691ff5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10999,6 +10999,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pternlogq256_maskz: return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + + case X86::BI__builtin_ia32_vpshldvd128: + case X86::BI__builtin_ia32_vpshldvd256: + case X86::BI__builtin_ia32_vpshldvd512: + case X86::BI__builtin_ia32_vpshldvq128: + case X86::BI__builtin_ia32_vpshldvq256: + case X86::BI__builtin_ia32_vpshldvq512: + case X86::BI__builtin_ia32_vpshldvw128: + case X86::BI__builtin_ia32_vpshldvw256: + case X86::BI__builtin_ia32_vpshldvw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdvd128: + case X86::BI__builtin_ia32_vpshrdvd256: + case X86::BI__builtin_ia32_vpshrdvd512: + case X86::BI__builtin_ia32_vpshrdvq128: + case X86::BI__builtin_ia32_vpshrdvq256: + case X86::BI__builtin_ia32_vpshrdvq512: + case X86::BI__builtin_ia32_vpshrdvw128: + case X86::BI__builtin_ia32_vpshrdvw256: + case X86::BI__builtin_ia32_vpshrdvw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index d2a58094fd0..53242524293 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -227,167 +227,141 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - __U); + return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, + (__v8di)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_shldv_epi64(__A, __B, __C), + (__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - (__mmask8) -1); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_shldv_epi64(__A, __B, __C), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - __U); + return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, + (__v16si)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_shldv_epi32(__A, __B, __C), + (__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_shldv_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); } - static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - __U); + return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, + (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - __U); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_shldv_epi16(__A, __B, __C), + (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - (__mmask32) -1); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_shldv_epi16(__A, __B, __C), + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - __U); + return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, + (__v8di)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - __U); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_shrdv_epi64(__A, __B, __C), + (__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, - (__v8di) __A, - (__v8di) __B, - (__mmask8) -1); + return (__m512i)__builtin_ia32_selectq_512(__U, + (__v8di)_mm512_shrdv_epi64(__A, __B, __C), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - __U); + return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, + (__v16si)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - __U); + return (__m512i) __builtin_ia32_selectd_512(__U, + (__v16si)_mm512_shrdv_epi32(__A, __B, __C), + (__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i) __builtin_ia32_selectd_512(__U, + (__v16si)_mm512_shrdv_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); } - static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - __U); + return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, + (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - __U); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), + (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) +_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { - return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, - (__v32hi) __A, - (__v32hi) __B, - (__mmask32) -1); + return (__m512i)__builtin_ia32_selectw_512(__U, + (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), + (__v32hi)_mm512_setzero_si512()); } diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index baaf5654631..632d14fb55a 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -421,327 +421,279 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__v8hi)_mm_setzero_si128()) static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); + return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, + (__v4di)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_shldv_epi64(__A, __B, __C), + (__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_shldv_epi64(__A, __B, __C), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); + return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, + (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_shldv_epi64(__A, __B, __C), + (__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_shldv_epi64(__A, __B, __C), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); + return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, + (__v8si)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_shldv_epi32(__A, __B, __C), + (__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_shldv_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); + return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, + (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_shldv_epi32(__A, __B, __C), + (__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_shldv_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); + return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, + (__v16hi)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_shldv_epi16(__A, __B, __C), + (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_shldv_epi16(__A, __B, __C), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); + return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, + (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_shldv_epi16(__A, __B, __C), + (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_shldv_epi16(__A, __B, __C), + (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); + return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, + (__v4di)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - __U); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_shrdv_epi64(__A, __B, __C), + (__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, - (__v4di) __A, - (__v4di) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_shrdv_epi64(__A, __B, __C), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); + return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, + (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - __U); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_shrdv_epi64(__A, __B, __C), + (__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, - (__v2di) __A, - (__v2di) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_shrdv_epi64(__A, __B, __C), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); + return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, + (__v8si)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_shrdv_epi32(__A, __B, __C), + (__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_shrdv_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); + return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, + (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_shrdv_epi32(__A, __B, __C), + (__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_shrdv_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); + return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, + (__v16hi)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), + (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) +_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { - return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); + return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, + (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_shrdv_epi16(__A, __B, __C), + (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { - return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_shrdv_epi16(__A, __B, __C), + (__v8hi)_mm_setzero_si128()); } |