diff options
author | Simon Tatham <simon.tatham@arm.com> | 2019-09-02 15:50:50 +0100 |
---|---|---|
committer | Simon Tatham <simon.tatham@arm.com> | 2019-10-24 16:33:13 +0100 |
commit | 08074cc96557dcb7ec91d7cd84c412414fa9a516 (patch) | |
tree | 11371e0a7950d1d4b5dc0e2d4476e1ba91b4672d /clang/lib/CodeGen/CGBuiltin.cpp | |
parent | 7c11da0cfd3396150c13657a2217f2044f314734 (diff) | |
download | bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.tar.gz bcm5719-llvm-08074cc96557dcb7ec91d7cd84c412414fa9a516.zip |
[clang,ARM] Initial ACLE intrinsics for MVE.
This commit sets up the infrastructure for auto-generating <arm_mve.h>
and doing clang-side code generation for the builtins it relies on,
and demonstrates that it works by implementing a representative sample
of the ACLE intrinsics, more or less matching the ones introduced in
LLVM IR by D67158,D68699,D68700.
Like NEON, that header file will provide a set of vector types like
uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON,
the ACLE spec for <arm_mve.h> includes a polymorphism system, so that
you can write plain vaddq() and disambiguate by the vector types you
pass to it.
Unlike the corresponding NEON code, I've arranged to make every user-
facing ACLE intrinsic into a clang builtin, and implement all the code
generation inside clang. So <arm_mve.h> itself contains nothing but
typedefs and function declarations, with the latter all using the new
`__attribute__((__clang_builtin))` system to arrange that the user-
facing function names correspond to the right internal BuiltinIDs.
So the new MveEmitter tablegen system specifies the full sequence of
IRBuilder operations that each user-facing ACLE intrinsic should
translate into. Where possible, the ACLE intrinsics map to standard IR
operations such as vector-typed `add` and `fadd`; where no standard
representation exists, I call down to the sample IR intrinsics
introduced in an earlier commit.
Doing it like this means that you get the polymorphism for free just
by using __attribute__((overloadable)): the clang overload resolution
decides which function declaration is the relevant one, and _then_ its
BuiltinID is looked up, so by the time we're doing code generation,
that's all been resolved by the standard system. It also means that
you get really nice error messages if the user passes the wrong
combination of types: clang will show the declarations from the header
file and explain why each one doesn't match.
(The obvious alternative approach would be to have wrapper functions
in <arm_mve.h> which pass their arguments to the underlying builtins.
But that doesn't work in the case where one of the arguments has to be
a constant integer: the wrapper function can't pass the constantness
through. So you'd have to do that case using a macro instead, and then
use C11 `_Generic` to handle the polymorphism. Then you have to add
horrible workarounds because `_Generic` requires even the untaken
branches to type-check successfully, and //then// if the user gets the
types wrong, the error message is totally unreadable!)
Reviewers: dmgreen, miyuki, ostannard
Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D67161
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 120 |
1 files changed, 115 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f9871b23314..c35d0a88e76 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4218,7 +4218,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) return RValue::get(V); ErrorUnsupported(E, "builtin function"); @@ -4229,13 +4229,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: - return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); + return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); @@ -4268,15 +4269,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, } Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { + const CallExpr *E, + ReturnValueSlot ReturnValue) { if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { assert(getContext().getAuxTargetInfo() && "Missing aux target info"); return EmitTargetArchBuiltinExpr( this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, - getContext().getAuxTargetInfo()->getTriple().getArch()); + ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); } - return EmitTargetArchBuiltinExpr(this, BuiltinID, E, + return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, getTarget().getTriple().getArch()); } @@ -6004,6 +6006,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { if (auto Hint = GetValueForARMHint(BuiltinID)) return Hint; @@ -6320,6 +6323,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); } + // Deal with MVE builtins + if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) + return Result; + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -6769,6 +6776,109 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } +Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + ReturnValueSlot ReturnValue, + llvm::Triple::ArchType Arch) { + enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; + Intrinsic::ID IRIntr; + unsigned NumVectors; + + // Code autogenerated by Tablegen will handle all the simple builtins. + switch (BuiltinID) { + #include "clang/Basic/arm_mve_builtin_cg.inc" + + // If we didn't match an MVE builtin id at all, go back to the + // main EmitARMBuiltinExpr. + default: + return nullptr; + } + + // Anything that breaks from that switch is an MVE builtin that + // needs handwritten code to generate. + + switch (CustomCodeGenType) { + + case CustomCodeGen::VLD24: { + llvm::SmallVector<Value *, 4> Ops; + llvm::SmallVector<llvm::Type *, 4> Tys; + + auto MvecCType = E->getType(); + auto MvecLType = ConvertType(MvecCType); + assert(MvecLType->isStructTy() && + "Return type for vld[24]q should be a struct"); + assert(MvecLType->getStructNumElements() == 1 && + "Return-type struct for vld[24]q should have one element"); + auto MvecLTypeInner = MvecLType->getStructElementType(0); + assert(MvecLTypeInner->isArrayTy() && + "Return-type struct for vld[24]q should contain an array"); + assert(MvecLTypeInner->getArrayNumElements() == NumVectors && + "Array member of return-type struct vld[24]q has wrong length"); + auto VecLType = MvecLTypeInner->getArrayElementType(); + + Tys.push_back(VecLType); + + auto Addr = E->getArg(0); + Ops.push_back(EmitScalarExpr(Addr)); + Tys.push_back(ConvertType(Addr->getType())); + + Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Value *LoadResult = Builder.CreateCall(F, Ops); + Value *MvecOut = UndefValue::get(MvecLType); + for (unsigned i = 0; i < NumVectors; ++i) { + Value *Vec = Builder.CreateExtractValue(LoadResult, i); + MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); + } + + if (ReturnValue.isNull()) + return MvecOut; + else + return Builder.CreateStore(MvecOut, ReturnValue.getValue()); + } + + case CustomCodeGen::VST24: { + llvm::SmallVector<Value *, 4> Ops; + llvm::SmallVector<llvm::Type *, 4> Tys; + + auto Addr = E->getArg(0); + Ops.push_back(EmitScalarExpr(Addr)); + Tys.push_back(ConvertType(Addr->getType())); + + auto MvecCType = E->getArg(1)->getType(); + auto MvecLType = ConvertType(MvecCType); + assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); + assert(MvecLType->getStructNumElements() == 1 && + "Data-type struct for vst2q should have one element"); + auto MvecLTypeInner = MvecLType->getStructElementType(0); + assert(MvecLTypeInner->isArrayTy() && + "Data-type struct for vst2q should contain an array"); + assert(MvecLTypeInner->getArrayNumElements() == NumVectors && + "Array member of return-type struct vld[24]q has wrong length"); + auto VecLType = MvecLTypeInner->getArrayElementType(); + + Tys.push_back(VecLType); + + AggValueSlot MvecSlot = CreateAggTemp(MvecCType); + EmitAggExpr(E->getArg(1), MvecSlot); + auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); + for (unsigned i = 0; i < NumVectors; i++) + Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); + + Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); + Value *ToReturn = nullptr; + for (unsigned i = 0; i < NumVectors; i++) { + Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); + ToReturn = Builder.CreateCall(F, Ops); + Ops.pop_back(); + } + return ToReturn; + } + + default: + llvm_unreachable("bad CustomCodegen enum value"); + } +} + static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, |