summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp75
1 files changed, 58 insertions, 17 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3ebf584ac8e..a0afb086536 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8718,6 +8718,47 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
return Res;
}
+static Value *
+EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
+ Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
+ bool NegAcc = false) {
+ unsigned Rnd = 4;
+ if (Ops.size() > 4)
+ Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+
+ if (NegAcc)
+ Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
+
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Value *Res;
+ if (Rnd != 4) {
+ Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
+ Intrinsic::x86_avx512_vfmadd_f32 :
+ Intrinsic::x86_avx512_vfmadd_f64;
+ Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ {Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else {
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
+ }
+ // If we have more than 3 arguments, we need to do masking.
+ if (Ops.size() > 3) {
+ Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
+ : Ops[PTIdx];
+
+ // If we negated the accumulator and the its the PassThru value we need to
+ // bypass the negate. Conveniently Upper should be the same thing in this
+ // case.
+ if (NegAcc && PTIdx == 2)
+ PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
+
+ Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
+ }
+ return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
+}
+
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
ArrayRef<Value *> Ops) {
llvm::Type *Ty = Ops[0]->getType();
@@ -9141,24 +9182,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86ConvertToMask(*this, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss3:
- case X86::BI__builtin_ia32_vfmaddsd3: {
- Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType());
- Value *Res = Builder.CreateCall(FMA, {A, B, C} );
- return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
- }
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss:
- case X86::BI__builtin_ia32_vfmaddsd: {
- Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType());
- Value *Res = Builder.CreateCall(FMA, {A, B, C} );
- Value *Zero = Constant::getNullValue(Ops[0]->getType());
- return Builder.CreateInsertElement(Zero, Res, (uint64_t)0);
- }
+ case X86::BI__builtin_ia32_vfmaddsd:
+ return EmitScalarFMAExpr(*this, Ops,
+ Constant::getNullValue(Ops[0]->getType()));
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
+ /*NegAcc*/true);
case X86::BI__builtin_ia32_vfmaddps:
case X86::BI__builtin_ia32_vfmaddpd:
case X86::BI__builtin_ia32_vfmaddps256:
OpenPOWER on IntegriCloud