summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp94
1 files changed, 94 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d4b11659667..36c811f473b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8427,6 +8427,84 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
return Res;
}
+// Lowers X86 FMA intrinsics to IR.
+static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ unsigned BuiltinID) {
+
+ bool IsAddSub = false;
+ bool IsScalar = false;
+
+ // 4 operands always means rounding mode without a mask here.
+ bool IsRound = Ops.size() == 4;
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: break;
+ case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddps512:
+ ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
+ case clang::X86::BI__builtin_ia32_vfmaddpd512:
+ ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps512: {
+ ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
+ IsAddSub = true;
+ break;
+ }
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd512: {
+ ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
+ IsAddSub = true;
+ break;
+ }
+ }
+
+ // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
+ if (IsRound) {
+ Function *Intr = CGF.CGM.getIntrinsic(ID);
+ if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4)
+ return CGF.Builder.CreateCall(Intr, Ops);
+ }
+
+ Value *A = Ops[0];
+ Value *B = Ops[1];
+ Value *C = Ops[2];
+
+ if (IsScalar) {
+ A = CGF.Builder.CreateExtractElement(A, (uint64_t)0);
+ B = CGF.Builder.CreateExtractElement(B, (uint64_t)0);
+ C = CGF.Builder.CreateExtractElement(C, (uint64_t)0);
+ }
+
+ llvm::Type *Ty = A->getType();
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
+
+ if (IsScalar)
+ return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
+
+ if (IsAddSub) {
+ // Negate even elts in C using a mask.
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<Constant *, 16> NMask;
+ Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0);
+ Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ NMask.push_back(i % 2 == 0 ? One : Zero);
+ }
+ Value *NegMask = ConstantVector::get(NMask);
+
+ Value *NegC = CGF.Builder.CreateFNeg(C);
+ Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
+ Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res);
+ }
+
+ return Res;
+}
+
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
ArrayRef<Value *> Ops) {
llvm::Type *Ty = Ops[0]->getType();
@@ -8820,6 +8898,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtq2mask512:
return EmitX86ConvertToMask(*this, Ops[0]);
+ case X86::BI__builtin_ia32_vfmaddss3:
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddps:
+ case X86::BI__builtin_ia32_vfmaddpd:
+ case X86::BI__builtin_ia32_vfmaddps256:
+ case X86::BI__builtin_ia32_vfmaddpd256:
+ case X86::BI__builtin_ia32_vfmaddps512:
+ case X86::BI__builtin_ia32_vfmaddpd512:
+ case X86::BI__builtin_ia32_vfmaddsubps:
+ case X86::BI__builtin_ia32_vfmaddsubpd:
+ case X86::BI__builtin_ia32_vfmaddsubps256:
+ case X86::BI__builtin_ia32_vfmaddsubpd256:
+ case X86::BI__builtin_ia32_vfmaddsubps512:
+ case X86::BI__builtin_ia32_vfmaddsubpd512:
+ return EmitX86FMAExpr(*this, Ops, BuiltinID);
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
OpenPOWER on IntegriCloud