diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 92 | ||||
-rw-r--r-- | clang/lib/Headers/avx512bwintrin.h | 35 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 68 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlbwintrin.h | 70 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 136 |
5 files changed, 204 insertions, 197 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b79a9a4a21d..e99a1c302a6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6277,23 +6277,13 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { return Result; } -static Value *EmitX86MaskedStore(CodeGenFunction &CGF, - SmallVectorImpl<Value *> &Ops, - unsigned Align) { - // Cast the pointer to right type. - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); - - // If the mask is all ones just emit a regular store. - if (const auto *C = dyn_cast<Constant>(Ops[2])) - if (C->isAllOnesValue()) - return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); +// Convert the mask from an integer type to a vector of i1. +static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, + unsigned NumElts) { - // Convert the mask from an integer type to a vector of i1. - unsigned NumElts = Ops[1]->getType()->getVectorNumElements(); llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), - cast<IntegerType>(Ops[2]->getType())->getBitWidth()); - Ops[2] = CGF.Builder.CreateBitCast(Ops[2], MaskTy); + cast<IntegerType>(Mask->getType())->getBitWidth()); + Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. @@ -6301,12 +6291,29 @@ static Value *EmitX86MaskedStore(CodeGenFunction &CGF, int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; - Ops[2] = CGF.Builder.CreateShuffleVector(Ops[2], Ops[2], + MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, makeArrayRef(Indices, NumElts), "extract"); } + return MaskVec; +} + +static Value *EmitX86MaskedStore(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, + unsigned Align) { + // Cast the pointer to right type. + Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Ops[1]->getType())); + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); - return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, Ops[2]); + return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); } static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, @@ -6320,24 +6327,24 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, if (C->isAllOnesValue()) return CGF.Builder.CreateAlignedLoad(Ops[0], Align); - // Convert the mask from an integer type to a vector of i1. - unsigned NumElts = Ops[1]->getType()->getVectorNumElements(); - llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), - cast<IntegerType>(Ops[2]->getType())->getBitWidth()); - Ops[2] = CGF.Builder.CreateBitCast(Ops[2], MaskTy); + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); - // If we have less than 8 elements, then the starting mask was an i8 and - // we need to extract down to the right number of elements. - if (NumElts < 8) { - int Indices[4]; - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; - Ops[2] = CGF.Builder.CreateShuffleVector(Ops[2], Ops[2], - makeArrayRef(Indices, NumElts), - "extract"); - } + return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); +} + +static Value *EmitX86Select(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops) { + + // If the mask is all ones just return first argument. + if (const auto *C = dyn_cast<Constant>(Ops[0])) + if (C->isAllOnesValue()) + return Ops[1]; + + Value *MaskVec = getMaskVecValue(CGF, Ops[0], + Ops[1]->getType()->getVectorNumElements()); - return CGF.Builder.CreateMaskedLoad(Ops[0], Align, Ops[2], Ops[1]); + return CGF.Builder.CreateSelect(MaskVec, Ops[1], Ops[2]); } Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, @@ -6787,6 +6794,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, SI->setAlignment(Align); return SI; } + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + return EmitX86Select(*this, Ops); // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index c7c8f8c3607..2b119a3bd77 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -452,17 +452,17 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) { - return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A, + return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __W, - (__mmask64) __U); + (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) { - return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A, + return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __W, - (__mmask32) __U); + (__v32hi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1852,38 +1852,35 @@ _mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, + (__v32hi) __A, + (__v32hi) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, + (__v32hi) __A, + (__v32hi) _mm512_setzero_hi ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, + (__v64qi) __A, + (__v64qi) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A, - (__v64qi) - _mm512_setzero_hi (), - (__mmask64) __U); + return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, + (__v64qi) __A, + (__v64qi) _mm512_setzero_hi ()); } - static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) { diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5e2b93972ae..efbc4bb1969 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3538,33 +3538,33 @@ _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { - return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, + return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __W, - (__mmask8) __U); + (__v8df) __A); } static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { - return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, + return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __W, - (__mmask16) __U); + (__v16sf) __A); } static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { - return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, + return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __W, - (__mmask8) __U); + (__v8di) __A); } static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { - return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, + return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __W, - (__mmask16) __U); + (__v16si) __A); } /* Compare */ @@ -5341,35 +5341,33 @@ _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, - (__v16si) __W, - (__mmask16) __U); + return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, + (__v16si) __A, + (__v16si) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, + (__v16si) __A, + (__v16si) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, - (__v8di) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, + (__v8di) __A, + (__v8di) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, + (__v8di) __A, + (__v8di) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -9243,35 +9241,33 @@ _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, - (__v8df) __W, - (__mmask8) __U); + return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, + (__v8df) __A, + (__v8df) __W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, + (__v8df) __A, + (__v8df) _mm512_setzero_pd ()); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, - (__v16sf) __W, - (__mmask16) __U); + return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, + (__v16sf) __A, + (__v16sf) __W); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, + (__v16sf) __A, + (__v16sf) _mm512_setzero_ps ()); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 71dac6b3bb3..43cdadbf4d0 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -786,33 +786,33 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __W, + (__v16qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, - (__mmask32) __U); + (__v32qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, - (__mmask8) __U); + (__v8hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, - (__mmask16) __U); + (__v16hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -2816,69 +2816,65 @@ _mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A, - (__v8hi) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, + (__v8hi) __A, + (__v8hi) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A, - (__v8hi) - _mm_setzero_hi (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, + (__v8hi) __A, + (__v8hi) _mm_setzero_hi ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A, - (__v16hi) __W, - (__mmask16) __U); + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, + (__v16hi) __A, + (__v16hi) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A, - (__v16hi) - _mm256_setzero_si256 (), - (__mmask16) __U); + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, + (__v16hi) __A, + (__v16hi) _mm256_setzero_si256 ()); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __A, + (__v16qi) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A, - (__v16qi) - _mm_setzero_hi (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __A, + (__v16qi) _mm_setzero_hi ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A, - (__v32qi) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, + (__v32qi) __A, + (__v32qi) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, + (__v32qi) __A, + (__v32qi) _mm256_setzero_si256 ()); } diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 42ab5949df9..62e5e863aea 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2049,58 +2049,58 @@ _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, + return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, - (__mmask8) __U); + (__v4si) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, + return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, - (__mmask8) __U); + (__v8si) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { - return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, - (__mmask8) __U); + (__v2df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { - return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, + return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, - (__mmask8) __U); + (__v4df) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { - return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, - (__mmask8) __U); + (__v4sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { - return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, + return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, - (__mmask8) __U); + (__v8sf) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, + return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, - (__mmask8) __U); + (__v2di) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, + return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, - (__mmask8) __U); + (__v4di) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -5837,36 +5837,34 @@ _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, + (__v4si) __A, + (__v4si) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, + (__v4si) __A, + (__v4si) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, + (__v8si) __A, + (__v8si) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, + (__v8si) __A, + (__v8si) _mm256_setzero_si256 ()); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -5926,35 +5924,33 @@ _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, + (__v2di) __A, + (__v2di) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_di (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, + (__v2di) __A, + (__v2di) _mm_setzero_di ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); + return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, + (__v4di) __A, + (__v4di) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, + (__v4di) __A, + (__v4di) _mm256_setzero_si256 ()); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -9284,69 +9280,65 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A, - (__v2df) __W, - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, + (__v2df) __A, + (__v2df) __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { - return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, + (__v2df) __A, + (__v2df) _mm_setzero_pd ()); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A, - (__v4df) __W, - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, + (__v4df) __A, + (__v4df) __W); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { - return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, + (__v4df) __A, + (__v4df) _mm256_setzero_pd ()); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, + (__v4sf) __A, + (__v4sf) __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, + (__v4sf) __A, + (__v4sf) _mm_setzero_ps ()); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, + (__v8sf) __A, + (__v8sf) __W); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, + (__v8sf) __A, + (__v8sf) _mm256_setzero_ps ()); } static __inline__ __m128 __DEFAULT_FN_ATTRS |