diff options
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 173 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 956 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 20 | 
3 files changed, 560 insertions, 589 deletions
| diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a09fa7aae5e..a086b4d926c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8555,79 +8555,110 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,  // Lowers X86 FMA intrinsics to IR.  static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, -                             unsigned BuiltinID) { +                             unsigned BuiltinID, bool IsAddSub) { -  bool IsAddSub = false; -  bool IsScalar = false; - -  // 4 operands always means rounding mode without a mask here. -  bool IsRound = Ops.size() == 4; - -  Intrinsic::ID ID; +  bool Subtract = false; +  Intrinsic::ID IID = Intrinsic::not_intrinsic;    switch (BuiltinID) {    default: break; -  case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddps512: -    ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; -  case clang::X86::BI__builtin_ia32_vfmaddpd512: -    ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps512: { -    ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; -    IsAddSub = true; +  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; +  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; +  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;      break; -  } -  case clang::X86::BI__builtin_ia32_vfmaddsubpd512: { -    ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; -    IsAddSub = true; +  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;      break;    } -  } - -  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). -  if (IsRound) { -    Function *Intr = CGF.CGM.getIntrinsic(ID); -    if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4) -      return CGF.Builder.CreateCall(Intr, Ops); -  }    Value *A = Ops[0];    Value *B = Ops[1];    Value *C = Ops[2]; -  if (IsScalar) { -    A = CGF.Builder.CreateExtractElement(A, (uint64_t)0); -    B = CGF.Builder.CreateExtractElement(B, (uint64_t)0); -    C = CGF.Builder.CreateExtractElement(C, (uint64_t)0); -  } +  if (Subtract) +    C = CGF.Builder.CreateFNeg(C); -  llvm::Type *Ty = A->getType(); -  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); -  Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); +  Value *Res; -  if (IsScalar) -    return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); +  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). +  if (IID != Intrinsic::not_intrinsic && +      cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) { +    Function *Intr = CGF.CGM.getIntrinsic(IID); +    Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); +  } else { +    llvm::Type *Ty = A->getType(); +    Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); +    Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); + +    if (IsAddSub) { +      // Negate even elts in C using a mask. +      unsigned NumElts = Ty->getVectorNumElements(); +      SmallVector<Constant *, 16> NMask; +      Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); +      Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); +      for (unsigned i = 0; i < NumElts; ++i) { +        NMask.push_back(i % 2 == 0 ? One : Zero); +      } +      Value *NegMask = ConstantVector::get(NMask); -  if (IsAddSub) { -    // Negate even elts in C using a mask. -    unsigned NumElts = Ty->getVectorNumElements(); -    SmallVector<Constant *, 16> NMask; -    Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); -    Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); -    for (unsigned i = 0; i < NumElts; ++i) { -      NMask.push_back(i % 2 == 0 ? One : Zero); +      Value *NegC = CGF.Builder.CreateFNeg(C); +      Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); +      Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);      } -    Value *NegMask = ConstantVector::get(NMask); +  } -    Value *NegC = CGF.Builder.CreateFNeg(C); -    Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); -    Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res); +  // Handle any required masking. +  Value *MaskFalseVal = nullptr; +  switch (BuiltinID) { +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: +    MaskFalseVal = Ops[0]; +    break; +  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +    MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); +    break; +  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +    MaskFalseVal = Ops[2]; +    break;    } +  if (MaskFalseVal) +    return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); +    return Res;  } @@ -9046,20 +9077,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,      return EmitX86ConvertToMask(*this, Ops[0]);    case X86::BI__builtin_ia32_vfmaddss3: -  case X86::BI__builtin_ia32_vfmaddsd3: +  case X86::BI__builtin_ia32_vfmaddsd3: { +    Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); +    Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0); +    Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0); +    Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType()); +    Value *Res = Builder.CreateCall(FMA, {A, B, C} ); +    return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); +  }    case X86::BI__builtin_ia32_vfmaddps:    case X86::BI__builtin_ia32_vfmaddpd:    case X86::BI__builtin_ia32_vfmaddps256:    case X86::BI__builtin_ia32_vfmaddpd256: -  case X86::BI__builtin_ia32_vfmaddps512: -  case X86::BI__builtin_ia32_vfmaddpd512: +  case X86::BI__builtin_ia32_vfmaddps512_mask: +  case X86::BI__builtin_ia32_vfmaddps512_maskz: +  case X86::BI__builtin_ia32_vfmaddps512_mask3: +  case X86::BI__builtin_ia32_vfmsubps512_mask3: +  case X86::BI__builtin_ia32_vfmaddpd512_mask: +  case X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubpd512_mask3: +    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);    case X86::BI__builtin_ia32_vfmaddsubps:    case X86::BI__builtin_ia32_vfmaddsubpd:    case X86::BI__builtin_ia32_vfmaddsubps256:    case X86::BI__builtin_ia32_vfmaddsubpd256: -  case X86::BI__builtin_ia32_vfmaddsubps512: -  case X86::BI__builtin_ia32_vfmaddsubpd512: -    return EmitX86FMAExpr(*this, Ops, BuiltinID); +  case X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubps512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddps512_mask3: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask: +  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);    case X86::BI__builtin_ia32_movdqa32store128_mask:    case X86::BI__builtin_ia32_movdqa64store128_mask: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 2b3633e377b..47a0c9d433f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -2578,910 +2578,818 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {                                            (__mmask8)-1, (int)(R))  #define _mm512_fmadd_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                      (__v8df)(__m512d)(B), \ -                                      (__v8df)(__m512d)(C), (int)(R)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           (__v8df)(__m512d)(C), \ +                                           (__mmask8)-1, (int)(R))  #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           (__v8df)(__m512d)(C), \ +                                           (__mmask8)(U), (int)(R))  #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  #define _mm512_fmsub_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                      (__v8df)(__m512d)(B), \ -                                      -(__v8df)(__m512d)(C), \ -                                      (int)(R)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           -(__v8df)(__m512d)(C), \ +                                           (__mmask8)-1, (int)(R))  #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       -(__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           -(__v8df)(__m512d)(C), \ +                                           (__mmask8)(U), (int)(R))  #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       -(__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            -(__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  #define _mm512_fnmadd_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                      (__v8df)(__m512d)(B), \ -                                      (__v8df)(__m512d)(C), (int)(R)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           (__v8df)(__m512d)(C), \ +                                           (__mmask8)-1, (int)(R))  #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  #define _mm512_fnmsub_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                      (__v8df)(__m512d)(B), \ -                                      -(__v8df)(__m512d)(C), \ -                                      (int)(R)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ +                                           (__v8df)(__m512d)(B), \ +                                           -(__v8df)(__m512d)(C), \ +                                           (__mmask8)-1, (int)(R))  #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       -(__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            -(__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                               (__v8df) __B, -                                               (__v8df) __C, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    (__v8df) __B, +                                                    (__v8df) __C, +                                                    (__mmask8) -1, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    (__v8df) __B, +                                                    (__v8df) __C, +                                                    (__mmask8) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) __C); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, +                                                     (__v8df) __B, +                                                     (__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, +                                                     (__v8df) __B, +                                                     (__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                               (__v8df) __B, -                                               -(__v8df) __C, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    (__v8df) __B, +                                                    -(__v8df) __C, +                                                    (__mmask8) -1, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                (__v8df) __B, -                                                -(__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    (__v8df) __B, +                                                    -(__v8df) __C, +                                                    (__mmask8) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                (__v8df) __B, -                                                -(__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, +                                                     (__v8df) __B, +                                                     -(__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                               (__v8df) __B, -                                               (__v8df) __C, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    -(__v8df) __B, +                                                    (__v8df) __C, +                                                    (__mmask8) -1, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) __C); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, +                                                     (__v8df) __B, +                                                     (__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, +                                                     (__v8df) __B, +                                                     (__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                               (__v8df) __B, -                                               -(__v8df) __C, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    -(__v8df) __B, +                                                    -(__v8df) __C, +                                                    (__mmask8) -1, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                                (__v8df) __B, -                                                -(__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, +                                                     (__v8df) __B, +                                                     -(__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_fmadd_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                     (__v16sf)(__m512)(B), \ -                                     (__v16sf)(__m512)(C), (int)(R)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          (__v16sf)(__m512)(B), \ +                                          (__v16sf)(__m512)(C), \ +                                          (__mmask16)-1, (int)(R))  #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          (__v16sf)(__m512)(B), \ +                                          (__v16sf)(__m512)(C), \ +                                          (__mmask16)(U), (int)(R))  #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  #define _mm512_fmsub_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                     (__v16sf)(__m512)(B), \ -                                     -(__v16sf)(__m512)(C), \ -                                     (int)(R)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          (__v16sf)(__m512)(B), \ +                                          -(__v16sf)(__m512)(C), \ +                                          (__mmask16)-1, (int)(R))  #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      -(__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          (__v16sf)(__m512)(B), \ +                                          -(__v16sf)(__m512)(C), \ +                                          (__mmask16)(U), (int)(R))  #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      -(__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           -(__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  #define _mm512_fnmadd_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                     (__v16sf)(__m512)(B), \ -                                     (__v16sf)(__m512)(C), (int)(R)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          -(__v16sf)(__m512)(B), \ +                                          (__v16sf)(__m512)(C), \ +                                          (__mmask16)-1, (int)(R))  #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  #define _mm512_fnmsub_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                     (__v16sf)(__m512)(B), \ -                                     -(__v16sf)(__m512)(C), \ -                                     (int)(R)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          -(__v16sf)(__m512)(B), \ +                                          -(__v16sf)(__m512)(C), \ +                                          (__mmask16)-1, (int)(R))  #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      -(__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           -(__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                              (__v16sf) __B, -                                              (__v16sf) __C, -                                              _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   (__v16sf) __B, +                                                   (__v16sf) __C, +                                                   (__mmask16) -1, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   (__v16sf) __B, +                                                   (__v16sf) __C, +                                                   (__mmask16) __U, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __C); +  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, +                                                    (__v16sf) __B, +                                                    (__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, +                                                    (__v16sf) __B, +                                                    (__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                              (__v16sf) __B, -                                              -(__v16sf) __C, -                                              _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   (__v16sf) __B, +                                                   -(__v16sf) __C, +                                                   (__mmask16) -1, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                (__v16sf) __B, -                                                -(__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   (__v16sf) __B, +                                                   -(__v16sf) __C, +                                                   (__mmask16) __U, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                (__v16sf) __B, -                                                -(__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, +                                                    (__v16sf) __B, +                                                    -(__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                              (__v16sf) __B, -                                              (__v16sf) __C, -                                              _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   -(__v16sf) __B, +                                                   (__v16sf) __C, +                                                   (__mmask16) -1, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __C); +  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, +                                                    (__v16sf) __B, +                                                    (__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, +                                                    (__v16sf) __B, +                                                    (__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                              (__v16sf) __B, -                                              -(__v16sf) __C, -                                              _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   -(__v16sf) __B, +                                                   -(__v16sf) __C, +                                                   (__mmask16) -1, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                                (__v16sf) __B, -                                                -(__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, +                                                    (__v16sf) __B, +                                                    -(__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_fmaddsub_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                         (__v8df)(__m512d)(B), \ -                                         (__v8df)(__m512d)(C), \ -                                         (int)(R)) +  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ +                                              (__v8df)(__m512d)(B), \ +                                              (__v8df)(__m512d)(C), \ +                                              (__mmask8)-1, (int)(R))  #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                          (__v8df)(__m512d)(B), \ -                                          (__v8df)(__m512d)(C), \ -                                          (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ +                                              (__v8df)(__m512d)(B), \ +                                              (__v8df)(__m512d)(C), \ +                                              (__mmask8)(U), (int)(R))  #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                          (__v8df)(__m512d)(B), \ -                                          (__v8df)(__m512d)(C), \ -                                          (int)(R)), \ -            (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ +                                               (__v8df)(__m512d)(B), \ +                                               (__v8df)(__m512d)(C), \ +                                               (__mmask8)(U), (int)(R))  #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                          (__v8df)(__m512d)(B), \ -                                          (__v8df)(__m512d)(C), \ -                                          (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ +                                               (__v8df)(__m512d)(B), \ +                                               (__v8df)(__m512d)(C), \ +                                               (__mmask8)(U), (int)(R))  #define _mm512_fmsubadd_round_pd(A, B, C, R) \ -  (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                         (__v8df)(__m512d)(B), \ -                                         -(__v8df)(__m512d)(C), \ -                                         (int)(R)) +  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ +                                              (__v8df)(__m512d)(B), \ +                                              -(__v8df)(__m512d)(C), \ +                                              (__mmask8)-1, (int)(R))  #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                          (__v8df)(__m512d)(B), \ -                                          -(__v8df)(__m512d)(C), \ -                                          (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ +                                              (__v8df)(__m512d)(B), \ +                                              -(__v8df)(__m512d)(C), \ +                                              (__mmask8)(U), (int)(R))  #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                          (__v8df)(__m512d)(B), \ -                                          -(__v8df)(__m512d)(C), \ -                                          (int)(R)), \ -            (__v8df)_mm512_setzero_pd()) +  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ +                                               (__v8df)(__m512d)(B), \ +                                               -(__v8df)(__m512d)(C), \ +                                               (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                  (__v8df) __B, -                                                  (__v8df) __C, -                                                  _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, +                                                      (__v8df) __B, +                                                      (__v8df) __C, +                                                      (__mmask8) -1, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                   (__v8df) __B, -                                                   (__v8df) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, +                                                      (__v8df) __B, +                                                      (__v8df) __C, +                                                      (__mmask8) __U, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                   (__v8df) __B, -                                                   (__v8df) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v8df) __C); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, +                                                       (__v8df) __B, +                                                       (__v8df) __C, +                                                       (__mmask8) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                   (__v8df) __B, -                                                   (__v8df) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, +                                                       (__v8df) __B, +                                                       (__v8df) __C, +                                                       (__mmask8) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                  (__v8df) __B, -                                                  -(__v8df) __C, -                                                  _MM_FROUND_CUR_DIRECTION); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, +                                                       (__v8df) __B, +                                                       -(__v8df) __C, +                                                       (__mmask8) -1, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                   (__v8df) __B, -                                                   -(__v8df) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, +                                                       (__v8df) __B, +                                                       -(__v8df) __C, +                                                       (__mmask8) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                   (__v8df) __B, -                                                   -(__v8df) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v8df) _mm512_setzero_pd()); +  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, +                                                        (__v8df) __B, +                                                        -(__v8df) __C, +                                                        (__mmask8) __U, +                                                        _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_fmaddsub_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                        (__v16sf)(__m512)(B), \ -                                        (__v16sf)(__m512)(C), \ -                                        (int)(R)) +  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ +                                             (__v16sf)(__m512)(B), \ +                                             (__v16sf)(__m512)(C), \ +                                             (__mmask16)-1, (int)(R))  #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                         (__v16sf)(__m512)(B), \ -                                         (__v16sf)(__m512)(C), \ -                                         (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ +                                             (__v16sf)(__m512)(B), \ +                                             (__v16sf)(__m512)(C), \ +                                             (__mmask16)(U), (int)(R))  #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                         (__v16sf)(__m512)(B), \ -                                         (__v16sf)(__m512)(C), \ -                                         (int)(R)), \ -            (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ +                                              (__v16sf)(__m512)(B), \ +                                              (__v16sf)(__m512)(C), \ +                                              (__mmask16)(U), (int)(R))  #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                         (__v16sf)(__m512)(B), \ -                                         (__v16sf)(__m512)(C), \ -                                         (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ +                                              (__v16sf)(__m512)(B), \ +                                              (__v16sf)(__m512)(C), \ +                                              (__mmask16)(U), (int)(R))  #define _mm512_fmsubadd_round_ps(A, B, C, R) \ -  (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                        (__v16sf)(__m512)(B), \ -                                        -(__v16sf)(__m512)(C), \ -                                        (int)(R)) +  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ +                                             (__v16sf)(__m512)(B), \ +                                             -(__v16sf)(__m512)(C), \ +                                             (__mmask16)-1, (int)(R))  #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                         (__v16sf)(__m512)(B), \ -                                         -(__v16sf)(__m512)(C), \ -                                         (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ +                                             (__v16sf)(__m512)(B), \ +                                             -(__v16sf)(__m512)(C), \ +                                             (__mmask16)(U), (int)(R))  #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                         (__v16sf)(__m512)(B), \ -                                         -(__v16sf)(__m512)(C), \ -                                         (int)(R)), \ -            (__v16sf)_mm512_setzero_ps()) +  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ +                                              (__v16sf)(__m512)(B), \ +                                              -(__v16sf)(__m512)(C), \ +                                              (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                 (__v16sf) __B, -                                                 (__v16sf) __C, -                                                 _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, +                                                      (__v16sf) __B, +                                                      (__v16sf) __C, +                                                      (__mmask16) -1, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                   (__v16sf) __B, -                                                   (__v16sf) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, +                                                      (__v16sf) __B, +                                                      (__v16sf) __C, +                                                      (__mmask16) __U, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                   (__v16sf) __B, -                                                   (__v16sf) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __C); +  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, +                                                       (__v16sf) __B, +                                                       (__v16sf) __C, +                                                       (__mmask16) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                   (__v16sf) __B, -                                                   (__v16sf) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, +                                                       (__v16sf) __B, +                                                       (__v16sf) __C, +                                                       (__mmask16) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                 (__v16sf) __B, -                                                 -(__v16sf) __C, -                                                 _MM_FROUND_CUR_DIRECTION); +  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, +                                                      (__v16sf) __B, +                                                      -(__v16sf) __C, +                                                      (__mmask16) -1, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                   (__v16sf) __B, -                                                   -(__v16sf) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, +                                                      (__v16sf) __B, +                                                      -(__v16sf) __C, +                                                      (__mmask16) __U, +                                                      _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                   (__v16sf) __B, -                                                   -(__v16sf) __C, -                                                   _MM_FROUND_CUR_DIRECTION), -            (__v16sf) _mm512_setzero_ps()); +  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, +                                                       (__v16sf) __B, +                                                       -(__v16sf) __C, +                                                       (__mmask16) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -                    (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                                        (__v8df)(__m512d)(B), \ -                                                        -(__v8df)(__m512d)(C), \ -                                                        (int)(R)), \ -                    (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    (__m512d)__builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                         (__v8df) __B, -                                                         -(__v8df) __C, -                                                         _MM_FROUND_CUR_DIRECTION), -                    (__v8df) __C); +  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, +                                                    (__v8df) __B, +                                                    (__v8df) __C, +                                                    (__mmask8) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -                   (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ -                                                      (__v16sf)(__m512)(B), \ -                                                      -(__v16sf)(__m512)(C), \ -                                                      (int)(R)), \ -                   (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    (__m512)__builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                        (__v16sf) __B, -                                                        -(__v16sf) __C, -                                                        _MM_FROUND_CUR_DIRECTION), -                    (__v16sf) __C); +  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, +                                                   (__v16sf) __B, +                                                   (__v16sf) __C, +                                                   (__mmask16) __U, +                                                   _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -                    (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ -                                                           (__v8df)(__m512d)(B), \ -                                                           -(__v8df)(__m512d)(C), \ -                                                           (int)(R)), \ -                    (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ +                                               (__v8df)(__m512d)(B), \ +                                               (__v8df)(__m512d)(C), \ +                                               (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    (__m512d)__builtin_ia32_vfmaddsubpd512 ((__v8df) __A, -                                                            (__v8df) __B, -                                                            -(__v8df) __C, -                                                            _MM_FROUND_CUR_DIRECTION), -                    (__v8df) __C); +  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, +                                                       (__v8df) __B, +                                                       (__v8df) __C, +                                                       (__mmask8) __U, +                                                       _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -                    (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ -                                                          (__v16sf)(__m512)(B), \ -                                                          -(__v16sf)(__m512)(C), \ -                                                          (int)(R)), \ -                    (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ +                                              (__v16sf)(__m512)(B), \ +                                              (__v16sf)(__m512)(C), \ +                                              (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    (__m512)__builtin_ia32_vfmaddsubps512 ((__v16sf) __A, -                                                           (__v16sf) __B, -                                                           -(__v16sf) __C, -                                                           _MM_FROUND_CUR_DIRECTION), -                    (__v16sf) __C); +  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, +                                                      (__v16sf) __B, +                                                      (__v16sf) __C, +                                                      (__mmask16) __U, +                                                      _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ -                                       (__v8df)(__m512d)(B), \ -                                       (__v8df)(__m512d)(C), \ -                                       (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           -(__v8df)(__m512d)(B), \ +                                           (__v8df)(__m512d)(C), \ +                                           (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask8) __U, -                    __builtin_ia32_vfmaddpd512 (-(__v8df) __A, -                                                (__v8df) __B, -                                                (__v8df) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    -(__v8df) __B, +                                                    (__v8df) __C, +                                                    (__mmask8) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -           __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ -                                      (__v16sf)(__m512)(B), \ -                                      (__v16sf)(__m512)(C), \ -                                      (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          -(__v16sf)(__m512)(B), \ +                                          (__v16sf)(__m512)(C), \ +                                          (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                    __builtin_ia32_vfmaddps512 (-(__v16sf) __A, -                                                (__v16sf) __B, -                                                (__v16sf) __C, -                                                _MM_FROUND_CUR_DIRECTION), -            (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   -(__v16sf) __B, +                                                   (__v16sf) __C, +                                                   (__mmask16) __U, +                                                   _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                                -(__v8df)(__m512d)(B), \ -                                                -(__v8df)(__m512d)(C), \ -                                                (int)(R)), \ -            (__v8df)(__m512d)(A)) +  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ +                                           -(__v8df)(__m512d)(B), \ +                                           -(__v8df)(__m512d)(C), \ +                                           (__mmask8)(U), (int)(R))  #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ -  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ -            (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -                                                -(__v8df)(__m512d)(B), \ -                                                -(__v8df)(__m512d)(C), \ -                                                (int)(R)), \ -            (__v8df)(__m512d)(C)) +  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ +                                            (__v8df)(__m512d)(B), \ +                                            (__v8df)(__m512d)(C), \ +                                            (__mmask8)(U), (int)(R))  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U, -                    (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                           -(__v8df) __B, -                                                           -(__v8df) __C, -                                                           _MM_FROUND_CUR_DIRECTION), -                    (__v8df) __A); +  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, +                                                    -(__v8df) __B, +                                                    -(__v8df) __C, +                                                    (__mmask8) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)  { -  return (__m512d) __builtin_ia32_selectpd_512((__mmask16) __U, -                    (__m512d) __builtin_ia32_vfmaddpd512 ((__v8df) __A, -                                                           -(__v8df) __B, -                                                           -(__v8df) __C, -                                                           _MM_FROUND_CUR_DIRECTION), -                    (__v8df) __C); +  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, +                                                     (__v8df) __B, +                                                     (__v8df) __C, +                                                     (__mmask8) __U, +                                                     _MM_FROUND_CUR_DIRECTION);  }  #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -            (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \ -                                                -(__v16sf)(__m512)(B), \ -                                                -(__v16sf)(__m512)(C), \ -                                               (int)(R)), \ -            (__v16sf)(__m512)(A)) +  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ +                                          -(__v16sf)(__m512)(B), \ +                                          -(__v16sf)(__m512)(C), \ +                                          (__mmask16)(U), (int)(R))  #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ -  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ -            (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \ -                                                -(__v16sf)(__m512)(B), \ -                                                -(__v16sf)(__m512)(C), \ -                                               (int)(R)), \ -            (__v16sf)(__m512)(C)) +  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ +                                           (__v16sf)(__m512)(B), \ +                                           (__v16sf)(__m512)(C), \ +                                           (__mmask16)(U), (int)(R))  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                   (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                        -(__v16sf) __B, -                                                        -(__v16sf) __C, -                                                        _MM_FROUND_CUR_DIRECTION), -                   (__v16sf) __A); +  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, +                                                   -(__v16sf) __B, +                                                   -(__v16sf) __C, +                                                   (__mmask16) __U, +                                                   _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)  { -  return (__m512) __builtin_ia32_selectps_512((__mmask16) __U, -                   (__m512) __builtin_ia32_vfmaddps512 ((__v16sf) __A, -                                                        -(__v16sf) __B, -                                                        -(__v16sf) __C, -                                                        _MM_FROUND_CUR_DIRECTION), -                   (__v16sf) __C); +  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, +                                                    (__v16sf) __B, +                                                    (__v16sf) __C, +                                                    (__mmask16) __U, +                                                    _MM_FROUND_CUR_DIRECTION);  } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index b4fdcc0f7e9..44c87316419 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2340,10 +2340,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_cvtuqq2ps512_mask:    case X86::BI__builtin_ia32_sqrtpd512_mask:    case X86::BI__builtin_ia32_sqrtps512_mask: -  case X86::BI__builtin_ia32_vfmaddpd512: -  case X86::BI__builtin_ia32_vfmaddps512: -  case X86::BI__builtin_ia32_vfmaddsubpd512: -  case X86::BI__builtin_ia32_vfmaddsubps512:      ArgNum = 3;      HasRC = true;      break; @@ -2378,6 +2374,22 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_vfmaddss3_mask:    case X86::BI__builtin_ia32_vfmaddss3_maskz:    case X86::BI__builtin_ia32_vfmaddss3_mask3: +  case X86::BI__builtin_ia32_vfmaddpd512_mask: +  case X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubpd512_mask3: +  case X86::BI__builtin_ia32_vfmaddps512_mask: +  case X86::BI__builtin_ia32_vfmaddps512_maskz: +  case X86::BI__builtin_ia32_vfmaddps512_mask3: +  case X86::BI__builtin_ia32_vfmsubps512_mask3: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask: +  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +  case X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubps512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:      ArgNum = 4;      HasRC = true;      break; | 

