summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def20
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp173
-rw-r--r--clang/lib/Headers/avx512fintrin.h956
-rw-r--r--clang/lib/Sema/SemaChecking.cpp20
-rw-r--r--clang/test/CodeGen/avx512f-builtins.c48
5 files changed, 600 insertions, 617 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a164681b2e6..33dda55fe39 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -731,10 +731,22 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "nc", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "nc", "fma|fma4")
TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512, "V8dV8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512, "V16fV16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512, "V8dV8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512, "V16fV16fV16fV16fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
// XOP
TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "nc", "xop")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a09fa7aae5e..a086b4d926c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8555,79 +8555,110 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
// Lowers X86 FMA intrinsics to IR.
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
- unsigned BuiltinID) {
+ unsigned BuiltinID, bool IsAddSub) {
- bool IsAddSub = false;
- bool IsScalar = false;
-
- // 4 operands always means rounding mode without a mask here.
- bool IsRound = Ops.size() == 4;
-
- Intrinsic::ID ID;
+ bool Subtract = false;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
default: break;
- case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddps512:
- ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
- case clang::X86::BI__builtin_ia32_vfmaddpd512:
- ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
- case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break;
- case clang::X86::BI__builtin_ia32_vfmaddsubps512: {
- ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
- IsAddSub = true;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
break;
- }
- case clang::X86::BI__builtin_ia32_vfmaddsubpd512: {
- ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
- IsAddSub = true;
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
break;
}
- }
-
- // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
- if (IsRound) {
- Function *Intr = CGF.CGM.getIntrinsic(ID);
- if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4)
- return CGF.Builder.CreateCall(Intr, Ops);
- }
Value *A = Ops[0];
Value *B = Ops[1];
Value *C = Ops[2];
- if (IsScalar) {
- A = CGF.Builder.CreateExtractElement(A, (uint64_t)0);
- B = CGF.Builder.CreateExtractElement(B, (uint64_t)0);
- C = CGF.Builder.CreateExtractElement(C, (uint64_t)0);
- }
+ if (Subtract)
+ C = CGF.Builder.CreateFNeg(C);
- llvm::Type *Ty = A->getType();
- Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+ Value *Res;
- if (IsScalar)
- return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
+ // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
+ if (IID != Intrinsic::not_intrinsic &&
+ cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
+ Function *Intr = CGF.CGM.getIntrinsic(IID);
+ Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
+ } else {
+ llvm::Type *Ty = A->getType();
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+
+ if (IsAddSub) {
+ // Negate even elts in C using a mask.
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<Constant *, 16> NMask;
+ Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0);
+ Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ NMask.push_back(i % 2 == 0 ? One : Zero);
+ }
+ Value *NegMask = ConstantVector::get(NMask);
- if (IsAddSub) {
- // Negate even elts in C using a mask.
- unsigned NumElts = Ty->getVectorNumElements();
- SmallVector<Constant *, 16> NMask;
- Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0);
- Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1);
- for (unsigned i = 0; i < NumElts; ++i) {
- NMask.push_back(i % 2 == 0 ? One : Zero);
+ Value *NegC = CGF.Builder.CreateFNeg(C);
+ Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);
}
- Value *NegMask = ConstantVector::get(NMask);
+ }
- Value *NegC = CGF.Builder.CreateFNeg(C);
- Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
- Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);
+ // Handle any required masking.
+ Value *MaskFalseVal = nullptr;
+ switch (BuiltinID) {
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ MaskFalseVal = Ops[0];
+ break;
+ case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ MaskFalseVal = Ops[2];
+ break;
}
+ if (MaskFalseVal)
+ return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
+
return Res;
}
@@ -9046,20 +9077,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86ConvertToMask(*this, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss3:
- case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddsd3: {
+ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType());
+ Value *Res = Builder.CreateCall(FMA, {A, B, C} );
+ return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
+ }
case X86::BI__builtin_ia32_vfmaddps:
case X86::BI__builtin_ia32_vfmaddpd:
case X86::BI__builtin_ia32_vfmaddps256:
case X86::BI__builtin_ia32_vfmaddpd256:
- case X86::BI__builtin_ia32_vfmaddps512:
- case X86::BI__builtin_ia32_vfmaddpd512:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
case X86::BI__builtin_ia32_vfmaddsubps:
case X86::BI__builtin_ia32_vfmaddsubpd:
case X86::BI__builtin_ia32_vfmaddsubps256:
case X86::BI__builtin_ia32_vfmaddsubpd256:
- case X86::BI__builtin_ia32_vfmaddsubps512:
- case X86::BI__builtin_ia32_vfmaddsubpd512:
- return EmitX86FMAExpr(*this, Ops, BuiltinID);
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 2b3633e377b..47a0c9d433f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2578,910 +2578,818 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask8)-1, (int)(R))
#define _mm512_fmadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (int)(R))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_fmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_fnmadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), (int)(R))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_fnmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ -(__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_fmadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (int)(R))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_fmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_fnmadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), (int)(R))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_fnmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ -(__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R))
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_fmsubadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R))
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R))
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)_mm512_setzero_pd())
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) _mm512_setzero_pd());
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_fmaddsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R))
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_fmsubadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R))
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R))
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) _mm512_setzero_ps());
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- (__m512d)__builtin_ia32_vfmaddpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- (__m512)__builtin_ia32_vfmaddps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- (__m512d)__builtin_ia32_vfmaddsubpd512 ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- (__m512)__builtin_ia32_vfmaddsubps512 ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U,
- __builtin_ia32_vfmaddpd512 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ -(__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- __builtin_ia32_vfmaddps512 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ -(__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- -(__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(A))
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \
- -(__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (int)(R)), \
- (__v8df)(__m512d)(C))
+ (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
- (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- -(__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __A);
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ -(__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U,
- (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A,
- -(__v8df) __B,
- -(__v8df) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v8df) __C);
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(A))
+ (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (int)(R)), \
- (__v16sf)(__m512)(C))
+ (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R))
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- -(__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __A);
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ -(__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_selectps_512((__mmask16) __U,
- (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A,
- -(__v16sf) __B,
- -(__v16sf) __C,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf) __C);
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index b4fdcc0f7e9..44c87316419 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2340,10 +2340,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
case X86::BI__builtin_ia32_sqrtpd512_mask:
case X86::BI__builtin_ia32_sqrtps512_mask:
- case X86::BI__builtin_ia32_vfmaddpd512:
- case X86::BI__builtin_ia32_vfmaddps512:
- case X86::BI__builtin_ia32_vfmaddsubpd512:
- case X86::BI__builtin_ia32_vfmaddsubps512:
ArgNum = 3;
HasRC = true;
break;
@@ -2378,6 +2374,22 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vfmaddss3_mask:
case X86::BI__builtin_ia32_vfmaddss3_maskz:
case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
ArgNum = 4;
HasRC = true;
break;
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index cd773791a2c..a6f7e17aa6e 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -461,7 +461,7 @@ __m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B,
// CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -483,7 +483,7 @@ __m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B,
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -505,7 +505,7 @@ __m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B,
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -521,7 +521,7 @@ __m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B,
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -547,7 +547,7 @@ __m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512
// CHECK-LABEL: @test_mm512_maskz_fmadd_pd
// CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmadd_pd(__U, __A, __B, __C);
}
__m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -569,7 +569,7 @@ __m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.*}}
// CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmsub_pd(__U, __A, __B, __C);
}
__m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -591,7 +591,7 @@ __m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m51
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.*}}
// CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C);
}
__m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -607,7 +607,7 @@ __m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m51
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.*}}
// CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C);
}
__m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -633,7 +633,7 @@ __m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __
// CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps
// CHECK: @llvm.x86.avx512.vfmadd.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -655,7 +655,7 @@ __m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: @llvm.x86.avx512.vfmadd.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -677,7 +677,7 @@ __m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, _
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: @llvm.x86.avx512.vfmadd.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -693,7 +693,7 @@ __m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, _
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: @llvm.x86.avx512.vfmadd.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -717,7 +717,7 @@ __m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 _
// CHECK-LABEL: @test_mm512_maskz_fmadd_ps
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmadd_ps(__U, __A, __B, __C);
}
__m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -739,7 +739,7 @@ __m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 _
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmsub_ps(__U, __A, __B, __C);
}
__m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -761,7 +761,7 @@ __m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C);
}
__m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -777,7 +777,7 @@ __m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C);
}
__m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -803,7 +803,7 @@ __m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd
// CHECK: @llvm.x86.avx512.vfmaddsub.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -825,7 +825,7 @@ __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __
// CHECK: fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.*}}
// CHECK: @llvm.x86.avx512.vfmaddsub.pd.512
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -863,7 +863,7 @@ __m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m
// check: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// check: select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x double> %{{.*}}, <8 x double> %{{.*}}
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
}
__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
@@ -891,7 +891,7 @@ __m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m
// CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x double> %{{.*}}, <8 x double> %{{.*}}
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
}
__m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -917,7 +917,7 @@ __m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B,
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps
// CHECK: @llvm.x86.avx512.vfmaddsub.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -939,7 +939,7 @@ __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B,
// CHECK: fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
// CHECK: @llvm.x86.avx512.vfmaddsub.ps.512
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -977,7 +977,7 @@ __m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m51
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
}
__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
@@ -1005,7 +1005,7 @@ __m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m51
// CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
}
__m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
OpenPOWER on IntegriCloud