summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp94
-rw-r--r--clang/lib/Headers/avx512fintrin.h1239
-rw-r--r--clang/lib/Headers/avx512vlintrin.h656
-rw-r--r--clang/lib/Sema/SemaChecking.cpp26
4 files changed, 1123 insertions, 892 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d4b11659667..36c811f473b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8427,6 +8427,84 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
return Res;
}
+// Lowers X86 FMA intrinsics to IR.
+static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ unsigned BuiltinID) {
+
+ bool IsAddSub = false;
+ bool IsScalar = false;
+
+ // 4 operands always means rounding mode without a mask here.
+ bool IsRound = Ops.size() == 4;
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: break;
+ case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddps512:
+ ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
+ case clang::X86::BI__builtin_ia32_vfmaddpd512:
+ ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512: {
+ ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
+ IsAddSub = true;
+ break;
+ }
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512: {
+ ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
+ IsAddSub = true;
+ break;
+ }
+ }
+
+ // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
+ if (IsRound) {
+ Function *Intr = CGF.CGM.getIntrinsic(ID);
+ if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4)
+ return CGF.Builder.CreateCall(Intr, Ops);
+ }
+
+ Value *A = Ops[0];
+ Value *B = Ops[1];
+ Value *C = Ops[2];
+
+ if (IsScalar) {
+ A = CGF.Builder.CreateExtractElement(A, (uint64_t)0);
+ B = CGF.Builder.CreateExtractElement(B, (uint64_t)0);
+ C = CGF.Builder.CreateExtractElement(C, (uint64_t)0);
+ }
+
+ llvm::Type *Ty = A->getType();
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+
+ if (IsScalar)
+ return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
+
+ if (IsAddSub) {
+ // Negate even elts in C using a mask.
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<Constant *, 16> NMask;
+ Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0);
+ Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ NMask.push_back(i % 2 == 0 ? One : Zero);
+ }
+ Value *NegMask = ConstantVector::get(NMask);
+
+ Value *NegC = CGF.Builder.CreateFNeg(C);
+ Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);
+ }
+
+ return Res;
+}
+
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
ArrayRef<Value *> Ops) {
llvm::Type *Ty = Ops[0]->getType();
@@ -8820,6 +8898,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtq2mask512:
return EmitX86ConvertToMask(*this, Ops[0]);
+ case X86::BI__builtin_ia32_vfmaddss3:
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddps:
+ case X86::BI__builtin_ia32_vfmaddpd:
+ case X86::BI__builtin_ia32_vfmaddps256:
+ case X86::BI__builtin_ia32_vfmaddpd256:
+ case X86::BI__builtin_ia32_vfmaddps512:
+ case X86::BI__builtin_ia32_vfmaddpd512:
+ case X86::BI__builtin_ia32_vfmaddsubps:
+ case X86::BI__builtin_ia32_vfmaddsubpd:
+ case X86::BI__builtin_ia32_vfmaddsubps256:
+ case X86::BI__builtin_ia32_vfmaddsubpd256:
+ case X86::BI__builtin_ia32_vfmaddsubps512:
+ case X86::BI__builtin_ia32_vfmaddsubpd512:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID);
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index fc030d87a07..5bfe39ec792 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2577,819 +2577,910 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask8)-1, (int)(R)); })
#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (__mmask8)-1, \
- (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), (int)(R)); })
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (__mmask8)-1, \
- (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), (int)(R)); })
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (__mmask16)-1, \
- (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), (int)(R)); })
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (__mmask16)-1, \
- (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), (int)(R)); })
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R)); })
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)_mm512_setzero_pd()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R)); })
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)); })
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION), \
+ (__v16sf) __C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) _mm512_setzero_ps());
}
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ (__m512d)__builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
-
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ (__m512d)__builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512)__builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
+ __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(A)); })
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
- (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (int)(R)), \
+ (__v8df)(__m512d)(C)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
+ (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __A);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
+ (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v8df) __C);
}
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(A)); })
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
- (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (int)(R)), \
+ (__v16sf)(__m512)(C)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
+ (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ _MM_FROUND_CUR_DIRECTION),
+ (__v16sf) __C);
}
@@ -8112,27 +8203,27 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R)); })
+ (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
@@ -8144,11 +8235,11 @@ _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __X,
+ (__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
@@ -8160,27 +8251,27 @@ _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
(__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
+ -(__v4sf)(__m128)(B), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
@@ -8192,11 +8283,11 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ (__v4sf) __X,
+ -(__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
@@ -8208,11 +8299,11 @@ _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
@@ -8224,43 +8315,43 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ -(__v4sf) __B,
+ (__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
- (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(C), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __X,
+ (__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
- (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
+ (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
@@ -8272,43 +8363,43 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
+ -(__v4sf) __B,
+ -(__v4sf) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
- (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
-(__v4sf)(__m128)(C), (__mmask8)(U), \
_MM_FROUND_CUR_DIRECTION); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __X,
- (__v4sf) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
+ -(__v4sf) __X,
+ -(__v4sf) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
- (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
+ (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
@@ -8320,11 +8411,11 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
@@ -8336,11 +8427,11 @@ _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __X,
+ (__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
@@ -8352,11 +8443,11 @@ _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8368,11 +8459,11 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
@@ -8384,11 +8475,11 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ (__v2df) __X,
+ -(__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
@@ -8400,11 +8491,11 @@ _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8416,43 +8507,43 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ -(__v2df) __B,
+ (__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
- (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(C), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
- (__v2df) __X,
- (__v2df) __Y,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __X,
+ (__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
- (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (__mmask8)(U), \
(int)(R)); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B);
+ __W[0] = (__U & 1) ? __Z[0] : __W[0];
+ return __W;
}
#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
@@ -8464,16 +8555,16 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
+ -(__v2df) __B,
+ -(__v2df) __C);
+ __A[0] = (__U & 1) ? __Z[0] : 0;
+ return __A;
}
#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
- (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
-(__v2df)(__m128d)(C), \
(__mmask8)(U), \
_MM_FROUND_CUR_DIRECTION); })
@@ -8481,16 +8572,16 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
- (__v2df) __X,
- (__v2df) (__Y),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
+ -(__v2df) __X,
+ -(__v2df) __Y);
+ __Y[0] = (__U & 1) ? __Z[0] : __Y[0];
+ return __Y;
}
#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
- (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
+ (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__mmask8)(U), (int)(R)); })
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 833e70a9471..9a2bf34d844 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -813,658 +813,722 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd (-(__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps (-(__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8)
- __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8)
- __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8)
- __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8)
- __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8)
- __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8)
- __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
__m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8)
- __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8)
- __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ -(__v2df) __B,
+ (__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ -(__v4df) __B,
+ (__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ -(__v4sf) __B,
+ (__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ -(__v8sf) __B,
+ (__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ -(__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __A);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
- return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
+ __builtin_ia32_vfmaddpd ((__v2df) __A,
+ -(__v2df) __B,
+ -(__v2df) __C),
+ (__v2df) __C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
- return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ -(__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
- return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
+ __builtin_ia32_vfmaddpd256 ((__v4df) __A,
+ -(__v4df) __B,
+ -(__v4df) __C),
+ (__v4df) __C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ -(__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __A);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
- return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
+ __builtin_ia32_vfmaddps ((__v4sf) __A,
+ -(__v4sf) __B,
+ -(__v4sf) __C),
+ (__v4sf) __C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ -(__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
- return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
+ __builtin_ia32_vfmaddps256 ((__v8sf) __A,
+ -(__v8sf) __B,
+ -(__v8sf) __C),
+ (__v8sf) __C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2f15c514dc1..7b9975e6abf 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2340,6 +2340,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
case X86::BI__builtin_ia32_sqrtpd512_mask:
case X86::BI__builtin_ia32_sqrtps512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512:
+ case X86::BI__builtin_ia32_vfmaddps512:
+ case X86::BI__builtin_ia32_vfmaddsubpd512:
+ case X86::BI__builtin_ia32_vfmaddsubps512:
ArgNum = 3;
HasRC = true;
break;
@@ -2368,28 +2372,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
- case X86::BI__builtin_ia32_vfmaddpd512_mask:
- case X86::BI__builtin_ia32_vfmaddpd512_mask3:
- case X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddps512_mask:
- case X86::BI__builtin_ia32_vfmaddps512_mask3:
- case X86::BI__builtin_ia32_vfmaddps512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case X86::BI__builtin_ia32_vfmsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubps512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- case X86::BI__builtin_ia32_vfnmaddpd512_mask:
- case X86::BI__builtin_ia32_vfnmaddps512_mask:
- case X86::BI__builtin_ia32_vfnmsubpd512_mask:
- case X86::BI__builtin_ia32_vfnmsubpd512_mask3:
- case X86::BI__builtin_ia32_vfnmsubps512_mask:
- case X86::BI__builtin_ia32_vfnmsubps512_mask3:
case X86::BI__builtin_ia32_vfmaddsd3_mask:
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
OpenPOWER on IntegriCloud