diff options
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 173 |
1 files changed, 112 insertions, 61 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a09fa7aae5e..a086b4d926c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8555,79 +8555,110 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, // Lowers X86 FMA intrinsics to IR. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, - unsigned BuiltinID) { + unsigned BuiltinID, bool IsAddSub) { - bool IsAddSub = false; - bool IsScalar = false; - - // 4 operands always means rounding mode without a mask here. - bool IsRound = Ops.size() == 4; - - Intrinsic::ID ID; + bool Subtract = false; + Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (BuiltinID) { default: break; - case clang::X86::BI__builtin_ia32_vfmaddss3: IsScalar = true; break; - case clang::X86::BI__builtin_ia32_vfmaddsd3: IsScalar = true; break; - case clang::X86::BI__builtin_ia32_vfmaddps512: - ID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; - case clang::X86::BI__builtin_ia32_vfmaddpd512: - ID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; - case clang::X86::BI__builtin_ia32_vfmaddsubps: IsAddSub = true; break; - case clang::X86::BI__builtin_ia32_vfmaddsubpd: IsAddSub = true; break; - case clang::X86::BI__builtin_ia32_vfmaddsubps256: IsAddSub = true; break; - case clang::X86::BI__builtin_ia32_vfmaddsubpd256: IsAddSub = true; break; - case clang::X86::BI__builtin_ia32_vfmaddsubps512: { - ID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; - IsAddSub = true; + case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; + case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; + case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; break; - } - case clang::X86::BI__builtin_ia32_vfmaddsubpd512: { - ID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; - IsAddSub = true; + case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + Subtract = true; + LLVM_FALLTHROUGH; + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; break; } - } - - // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). - if (IsRound) { - Function *Intr = CGF.CGM.getIntrinsic(ID); - if (cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != (uint64_t)4) - return CGF.Builder.CreateCall(Intr, Ops); - } Value *A = Ops[0]; Value *B = Ops[1]; Value *C = Ops[2]; - if (IsScalar) { - A = CGF.Builder.CreateExtractElement(A, (uint64_t)0); - B = CGF.Builder.CreateExtractElement(B, (uint64_t)0); - C = CGF.Builder.CreateExtractElement(C, (uint64_t)0); - } + if (Subtract) + C = CGF.Builder.CreateFNeg(C); - llvm::Type *Ty = A->getType(); - Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Value *Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); + Value *Res; - if (IsScalar) - return CGF.Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); + // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). + if (IID != Intrinsic::not_intrinsic && + cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) { + Function *Intr = CGF.CGM.getIntrinsic(IID); + Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); + } else { + llvm::Type *Ty = A->getType(); + Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); + Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); + + if (IsAddSub) { + // Negate even elts in C using a mask. + unsigned NumElts = Ty->getVectorNumElements(); + SmallVector<Constant *, 16> NMask; + Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); + Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); + for (unsigned i = 0; i < NumElts; ++i) { + NMask.push_back(i % 2 == 0 ? One : Zero); + } + Value *NegMask = ConstantVector::get(NMask); - if (IsAddSub) { - // Negate even elts in C using a mask. - unsigned NumElts = Ty->getVectorNumElements(); - SmallVector<Constant *, 16> NMask; - Constant *Zero = ConstantInt::get(CGF.Builder.getInt1Ty(), 0); - Constant *One = ConstantInt::get(CGF.Builder.getInt1Ty(), 1); - for (unsigned i = 0; i < NumElts; ++i) { - NMask.push_back(i % 2 == 0 ? One : Zero); + Value *NegC = CGF.Builder.CreateFNeg(C); + Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); + Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res); } - Value *NegMask = ConstantVector::get(NMask); + } - Value *NegC = CGF.Builder.CreateFNeg(C); - Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); - Res = CGF.Builder.CreateSelect(NegMask, FMSub, Res); + // Handle any required masking. + Value *MaskFalseVal = nullptr; + switch (BuiltinID) { + case clang::X86::BI__builtin_ia32_vfmaddps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: + MaskFalseVal = Ops[0]; + break; + case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); + break; + case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + MaskFalseVal = Ops[2]; + break; } + if (MaskFalseVal) + return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); + return Res; } @@ -9046,20 +9077,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86ConvertToMask(*this, Ops[0]); case X86::BI__builtin_ia32_vfmaddss3: - case X86::BI__builtin_ia32_vfmaddsd3: + case X86::BI__builtin_ia32_vfmaddsd3: { + Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); + Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0); + Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0); + Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType()); + Value *Res = Builder.CreateCall(FMA, {A, B, C} ); + return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0); + } case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: case X86::BI__builtin_ia32_vfmaddps256: case X86::BI__builtin_ia32_vfmaddpd256: - case X86::BI__builtin_ia32_vfmaddps512: - case X86::BI__builtin_ia32_vfmaddpd512: + case X86::BI__builtin_ia32_vfmaddps512_mask: + case X86::BI__builtin_ia32_vfmaddps512_maskz: + case X86::BI__builtin_ia32_vfmaddps512_mask3: + case X86::BI__builtin_ia32_vfmsubps512_mask3: + case X86::BI__builtin_ia32_vfmaddpd512_mask: + case X86::BI__builtin_ia32_vfmaddpd512_maskz: + case X86::BI__builtin_ia32_vfmaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: + return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); case X86::BI__builtin_ia32_vfmaddsubps: case X86::BI__builtin_ia32_vfmaddsubpd: case X86::BI__builtin_ia32_vfmaddsubps256: case X86::BI__builtin_ia32_vfmaddsubpd256: - case X86::BI__builtin_ia32_vfmaddsubps512: - case X86::BI__builtin_ia32_vfmaddsubpd512: - return EmitX86FMAExpr(*this, Ops, BuiltinID); + case X86::BI__builtin_ia32_vfmaddsubps512_mask: + case X86::BI__builtin_ia32_vfmaddsubps512_maskz: + case X86::BI__builtin_ia32_vfmaddsubps512_mask3: + case X86::BI__builtin_ia32_vfmsubaddps512_mask3: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask: + case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: + case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: |

