diff options
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 173 | 
1 files changed, 112 insertions, 61 deletions
| diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a09fa7aae5e..a086b4d926c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8555,79 +8555,110 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,  // Lowers X86 FMA intrinsics to IR.  static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, -                             unsigned BuiltinID) { +                             unsigned BuiltinID, bool IsAddSub) { -  bool IsAddSub = false; -  bool IsScalar = false; - -  // 4 operands always means rounding mode without a mask here. -  bool IsRound = Ops.size() == 4; - -  Intrinsic::ID ID; +  bool Subtract = false; +  Intrinsic::ID IID = Intrinsic::not_intrinsic;    switch (BuiltinID) {    default: break; -  case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddps512: -    ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; -  case clang::X86::BI__builtin_ia32_vfmaddpd512: -    ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break; -  case clang::X86::BI__builtin_ia32_vfmaddsubps512: { -    ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; -    IsAddSub = true; +  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; +  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; +  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;      break; -  } -  case clang::X86::BI__builtin_ia32_vfmaddsubpd512: { -    ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; -    IsAddSub = true; +  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +    Subtract = true; +    LLVM_FALLTHROUGH; +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;      break;    } -  } - -  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). -  if (IsRound) { -    Function *Intr = CGF.CGM.getIntrinsic(ID); -    if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4) -      return CGF.Builder.CreateCall(Intr, Ops); -  }    Value *A = Ops[0];    Value *B = Ops[1];    Value *C = Ops[2]; -  if (IsScalar) { -    A = CGF.Builder.CreateExtractElement(A, (uint64_t)0); -    B = CGF.Builder.CreateExtractElement(B, (uint64_t)0); -    C = CGF.Builder.CreateExtractElement(C, (uint64_t)0); -  } +  if (Subtract) +    C = CGF.Builder.CreateFNeg(C); -  llvm::Type *Ty = A->getType(); -  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); -  Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); +  Value *Res; -  if (IsScalar) -    return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); +  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). +  if (IID != Intrinsic::not_intrinsic && +      cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) { +    Function *Intr = CGF.CGM.getIntrinsic(IID); +    Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); +  } else { +    llvm::Type *Ty = A->getType(); +    Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); +    Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); + +    if (IsAddSub) { +      // Negate even elts in C using a mask. +      unsigned NumElts = Ty->getVectorNumElements(); +      SmallVector<Constant *, 16> NMask; +      Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); +      Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); +      for (unsigned i = 0; i < NumElts; ++i) { +        NMask.push_back(i % 2 == 0 ? One : Zero); +      } +      Value *NegMask = ConstantVector::get(NMask); -  if (IsAddSub) { -    // Negate even elts in C using a mask. -    unsigned NumElts = Ty->getVectorNumElements(); -    SmallVector<Constant *, 16> NMask; -    Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); -    Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); -    for (unsigned i = 0; i < NumElts; ++i) { -      NMask.push_back(i % 2 == 0 ? One : Zero); +      Value *NegC = CGF.Builder.CreateFNeg(C); +      Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); +      Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);      } -    Value *NegMask = ConstantVector::get(NMask); +  } -    Value *NegC = CGF.Builder.CreateFNeg(C); -    Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); -    Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res); +  // Handle any required masking. +  Value *MaskFalseVal = nullptr; +  switch (BuiltinID) { +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: +    MaskFalseVal = Ops[0]; +    break; +  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +    MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); +    break; +  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: +  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +    MaskFalseVal = Ops[2]; +    break;    } +  if (MaskFalseVal) +    return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); +    return Res;  } @@ -9046,20 +9077,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,      return EmitX86ConvertToMask(*this, Ops[0]);    case X86::BI__builtin_ia32_vfmaddss3: -  case X86::BI__builtin_ia32_vfmaddsd3: +  case X86::BI__builtin_ia32_vfmaddsd3: { +    Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); +    Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0); +    Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0); +    Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType()); +    Value *Res = Builder.CreateCall(FMA, {A, B, C} ); +    return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); +  }    case X86::BI__builtin_ia32_vfmaddps:    case X86::BI__builtin_ia32_vfmaddpd:    case X86::BI__builtin_ia32_vfmaddps256:    case X86::BI__builtin_ia32_vfmaddpd256: -  case X86::BI__builtin_ia32_vfmaddps512: -  case X86::BI__builtin_ia32_vfmaddpd512: +  case X86::BI__builtin_ia32_vfmaddps512_mask: +  case X86::BI__builtin_ia32_vfmaddps512_maskz: +  case X86::BI__builtin_ia32_vfmaddps512_mask3: +  case X86::BI__builtin_ia32_vfmsubps512_mask3: +  case X86::BI__builtin_ia32_vfmaddpd512_mask: +  case X86::BI__builtin_ia32_vfmaddpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubpd512_mask3: +    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);    case X86::BI__builtin_ia32_vfmaddsubps:    case X86::BI__builtin_ia32_vfmaddsubpd:    case X86::BI__builtin_ia32_vfmaddsubps256:    case X86::BI__builtin_ia32_vfmaddsubpd256: -  case X86::BI__builtin_ia32_vfmaddsubps512: -  case X86::BI__builtin_ia32_vfmaddsubpd512: -    return EmitX86FMAExpr(*this, Ops, BuiltinID); +  case X86::BI__builtin_ia32_vfmaddsubps512_mask: +  case X86::BI__builtin_ia32_vfmaddsubps512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubps512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddps512_mask3: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask: +  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: +  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: +  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: +    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);    case X86::BI__builtin_ia32_movdqa32store128_mask:    case X86::BI__builtin_ia32_movdqa64store128_mask: | 

