diff options
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 258 |
1 files changed, 147 insertions, 111 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2fe4167a888..52e40db5402 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1748,209 +1748,245 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { - NeonTypeFlags::EltType ET; - bool usgn; unsigned int Int = 0; - bool OverloadInt = true; + // Scalar result generated across vectors + bool AcrossVec = false; + // Extend element of one-element vector + bool ExtendEle = false; + bool OverloadInt = false; const char *s = NULL; - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - // AArch64 scalar builtins are not overloaded, they do not have an extra // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { default: break; // Scalar Add case AArch64::BI__builtin_neon_vaddd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vaddds; - s = "vaddds"; usgn = false; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vaddds; + s = "vaddds"; break; case AArch64::BI__builtin_neon_vaddd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vadddu; - s = "vadddu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vadddu; + s = "vadddu"; break; // Scalar Sub case AArch64::BI__builtin_neon_vsubd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubds; - s = "vsubds"; usgn = false; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vsubds; + s = "vsubds"; break; case AArch64::BI__builtin_neon_vsubd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubdu; - s = "vsubdu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vsubdu; + s = "vsubdu"; break; // Scalar Saturating Add case AArch64::BI__builtin_neon_vqaddb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqadds_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqadds; - s = "vqadds"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqadds_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqaddd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqaddu; - s = "vqaddu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; OverloadInt = true; break; // Scalar Saturating Sub case AArch64::BI__builtin_neon_vqsubb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubs_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubd_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubs; - s = "vqsubs"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubs_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqsubd_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubu; - s = "vqsubu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; OverloadInt = true; break; // Scalar Shift Left case AArch64::BI__builtin_neon_vshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshlds; - s = "vshlds"; usgn = false; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vshlds; + s = "vshlds"; break; case AArch64::BI__builtin_neon_vshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshldu; - s = "vshldu"; usgn = true; OverloadInt = false; break; + Int = Intrinsic::aarch64_neon_vshldu; + s = "vshldu"; break; // Scalar Saturating Shift Left case AArch64::BI__builtin_neon_vqshlb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshls_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshls; - s = "vqshls"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshlh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshls_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshlu; - s = "vqshlu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; OverloadInt = true; break; // Scalar Rouding Shift Left case AArch64::BI__builtin_neon_vrshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshlds; - s = "vrshlds"; usgn = false; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vrshlds; + s = "vrshlds"; break; case AArch64::BI__builtin_neon_vrshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshldu; - s = "vrshldu"; usgn = true; OverloadInt=false; break; + Int = Intrinsic::aarch64_neon_vrshldu; + s = "vrshldu"; break; // Scalar Saturating Rouding Shift Left case AArch64::BI__builtin_neon_vqrshlb_s8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlh_s16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshls_s32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshld_s64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshls; - s = "vqrshls"; usgn = false; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlb_u8: - ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshlh_u16: - ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshls_u32: - ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; case AArch64::BI__builtin_neon_vqrshld_u64: - ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshlu; - s = "vqrshlu"; usgn = true; OverloadInt = true; break; + Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; OverloadInt = true; break; // Scalar Reduce Pairwise Add case AArch64::BI__builtin_neon_vpaddd_s64: Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpadds_f32: Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpaddd_f64: Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Max case AArch64::BI__builtin_neon_vpmaxs_f32: Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpmaxqd_f64: Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Min case AArch64::BI__builtin_neon_vpmins_f32: Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpminqd_f64: Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq"; - OverloadInt = false; break; + break; // Scalar Reduce Pairwise Floating Point Maxnm case AArch64::BI__builtin_neon_vpmaxnms_f32: Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpmaxnmqd_f64: Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq"; - OverloadInt = false; break; - // Scalar Reduce Pairwise Floating Point Minnm + break; + // Scalar Reduce Pairwise Floating Point Minnm case AArch64::BI__builtin_neon_vpminnms_f32: Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm"; - OverloadInt = false; break; + break; case AArch64::BI__builtin_neon_vpminnmqd_f64: Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq"; - OverloadInt = false; break; + break; + // The followings are intrinsics with scalar results generated AcrossVec vectors + case AArch64::BI__builtin_neon_vaddlv_s8: + case AArch64::BI__builtin_neon_vaddlv_s16: + case AArch64::BI__builtin_neon_vaddlvq_s8: + case AArch64::BI__builtin_neon_vaddlvq_s16: + case AArch64::BI__builtin_neon_vaddlvq_s32: + Int = Intrinsic::aarch64_neon_saddlv; + AcrossVec = true; ExtendEle = true; s = "saddlv"; break; + case AArch64::BI__builtin_neon_vaddlv_u8: + case AArch64::BI__builtin_neon_vaddlv_u16: + case AArch64::BI__builtin_neon_vaddlvq_u8: + case AArch64::BI__builtin_neon_vaddlvq_u16: + case AArch64::BI__builtin_neon_vaddlvq_u32: + Int = Intrinsic::aarch64_neon_uaddlv; + AcrossVec = true; ExtendEle = true; s = "uaddlv"; break; + case AArch64::BI__builtin_neon_vmaxv_s8: + case AArch64::BI__builtin_neon_vmaxv_s16: + case AArch64::BI__builtin_neon_vmaxvq_s8: + case AArch64::BI__builtin_neon_vmaxvq_s16: + case AArch64::BI__builtin_neon_vmaxvq_s32: + Int = Intrinsic::aarch64_neon_smaxv; + AcrossVec = true; ExtendEle = false; s = "smaxv"; break; + case AArch64::BI__builtin_neon_vmaxv_u8: + case AArch64::BI__builtin_neon_vmaxv_u16: + case AArch64::BI__builtin_neon_vmaxvq_u8: + case AArch64::BI__builtin_neon_vmaxvq_u16: + case AArch64::BI__builtin_neon_vmaxvq_u32: + Int = Intrinsic::aarch64_neon_umaxv; + AcrossVec = true; ExtendEle = false; s = "umaxv"; break; + case AArch64::BI__builtin_neon_vminv_s8: + case AArch64::BI__builtin_neon_vminv_s16: + case AArch64::BI__builtin_neon_vminvq_s8: + case AArch64::BI__builtin_neon_vminvq_s16: + case AArch64::BI__builtin_neon_vminvq_s32: + Int = Intrinsic::aarch64_neon_sminv; + AcrossVec = true; ExtendEle = false; s = "sminv"; break; + case AArch64::BI__builtin_neon_vminv_u8: + case AArch64::BI__builtin_neon_vminv_u16: + case AArch64::BI__builtin_neon_vminvq_u8: + case AArch64::BI__builtin_neon_vminvq_u16: + case AArch64::BI__builtin_neon_vminvq_u32: + Int = Intrinsic::aarch64_neon_uminv; + AcrossVec = true; ExtendEle = false; s = "uminv"; break; + case AArch64::BI__builtin_neon_vaddv_s8: + case AArch64::BI__builtin_neon_vaddv_s16: + case AArch64::BI__builtin_neon_vaddvq_s8: + case AArch64::BI__builtin_neon_vaddvq_s16: + case AArch64::BI__builtin_neon_vaddvq_s32: + case AArch64::BI__builtin_neon_vaddv_u8: + case AArch64::BI__builtin_neon_vaddv_u16: + case AArch64::BI__builtin_neon_vaddvq_u8: + case AArch64::BI__builtin_neon_vaddvq_u16: + case AArch64::BI__builtin_neon_vaddvq_u32: + Int = Intrinsic::aarch64_neon_vaddv; + AcrossVec = true; ExtendEle = false; s = "vaddv"; break; + case AArch64::BI__builtin_neon_vmaxvq_f32: + Int = Intrinsic::aarch64_neon_vmaxv; + AcrossVec = true; ExtendEle = false; s = "vmaxv"; break; + case AArch64::BI__builtin_neon_vminvq_f32: + Int = Intrinsic::aarch64_neon_vminv; + AcrossVec = true; ExtendEle = false; s = "vminv"; break; + case AArch64::BI__builtin_neon_vmaxnmvq_f32: + Int = Intrinsic::aarch64_neon_vmaxnmv; + AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break; + case AArch64::BI__builtin_neon_vminnmvq_f32: + Int = Intrinsic::aarch64_neon_vminnmv; + AcrossVec = true; ExtendEle = false; s = "vminnmv"; break; } if (!Int) return 0; // AArch64 scalar builtin that returns scalar type - // and should be mapped to AArch64 intrinsic that takes - // one-element vector type arguments and returns + // and should be mapped to AArch64 intrinsic that returns // one-element vector type. - llvm::Type *Ty = 0; Function *F = 0; - if (OverloadInt) { + SmallVector<Value *, 4> Ops; + if (AcrossVec) { + // Gen arg type + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); + llvm::Type *ETy = VTy->getElementType(); + llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1); + + if (ExtendEle) { + assert(!ETy->isFloatingPointTy()); + RTy = llvm::VectorType::getExtendedElementVectorType(RTy); + } + + llvm::Type *Tys[2] = {RTy, VTy}; + F = CGF.CGM.getIntrinsic(Int, Tys); + assert(E->getNumArgs() == 1); + } + else if (OverloadInt) { // Determine the type of this overloaded AArch64 intrinsic - NeonTypeFlags Type(ET, usgn, false); - llvm::VectorType *VTy = GetNeonType(&CGF, Type, true); - Ty = VTy; - if (!Ty) - return 0; - F = CGF.CGM.getIntrinsic(Int, Ty); + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); + assert(VTy); + + F = CGF.CGM.getIntrinsic(Int, VTy); } else F = CGF.CGM.getIntrinsic(Int); + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); + } + Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); // AArch64 intrinsic one-element vector type cast to @@ -1959,7 +1995,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { + const CallExpr *E) { // Process AArch64 scalar builtins if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) |