diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/AST/ItaniumMangle.cpp | 8 | ||||
-rw-r--r-- | clang/lib/Basic/Targets.cpp | 315 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 1307 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 221 | ||||
-rw-r--r-- | clang/lib/Driver/Tools.cpp | 22 | ||||
-rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 15 |
7 files changed, 32 insertions, 1858 deletions
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index f1a16b50337..58a44b7645a 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2300,10 +2300,10 @@ void CXXNameMangler::mangleType(const VectorType *T) { llvm::Triple Target = getASTContext().getTargetInfo().getTriple(); llvm::Triple::ArchType Arch = getASTContext().getTargetInfo().getTriple().getArch(); - if (Arch == llvm::Triple::aarch64 || - Arch == llvm::Triple::aarch64_be || - Arch == llvm::Triple::arm64_be || - (Arch == llvm::Triple::arm64 && !Target.isOSDarwin())) + if ((Arch == llvm::Triple::aarch64 || + Arch == llvm::Triple::aarch64_be || + Arch == llvm::Triple::arm64_be || + Arch == llvm::Triple::arm64) && !Target.isOSDarwin()) mangleAArch64NeonVectorType(T); else mangleNeonVectorType(T); diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 62d44be9deb..82d79f7a153 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -3408,289 +3408,6 @@ public: }; } -namespace { -class AArch64TargetInfo : public TargetInfo { - virtual void setDescriptionString() = 0; - static const char * const GCCRegNames[]; - static const TargetInfo::GCCRegAlias GCCRegAliases[]; - - enum FPUModeEnum { - FPUMode, - NeonMode - }; - - unsigned FPU; - unsigned CRC; - unsigned Crypto; - static const Builtin::Info BuiltinInfo[]; - -public: - AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) { - LongWidth = LongAlign = 64; - LongDoubleWidth = LongDoubleAlign = 128; - PointerWidth = PointerAlign = 64; - SuitableAlign = 128; - - WCharType = UnsignedInt; - if (getTriple().getOS() == llvm::Triple::NetBSD) { - WCharType = SignedInt; - Int64Type = SignedLongLong; - IntMaxType = SignedLongLong; - UIntMaxType = UnsignedLongLong; - } else { - WCharType = UnsignedInt; - Int64Type = SignedLong; - IntMaxType = SignedLong; - UIntMaxType = UnsignedLong; - } - LongDoubleFormat = &llvm::APFloat::IEEEquad; - - // AArch64 backend supports 64-bit operations at the moment. In principle - // 128-bit is possible if register-pairs are used. - MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; - - TheCXXABI.set(TargetCXXABI::GenericAArch64); - } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - // GCC defines theses currently - Builder.defineMacro("__aarch64__"); - - // ACLE predefines. Many can only have one possible value on v8 AArch64. - Builder.defineMacro("__ARM_ACLE", "200"); - Builder.defineMacro("__ARM_ARCH", "8"); - Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'"); - - Builder.defineMacro("__ARM_64BIT_STATE"); - Builder.defineMacro("__ARM_PCS_AAPCS64"); - Builder.defineMacro("__ARM_ARCH_ISA_A64"); - - Builder.defineMacro("__ARM_FEATURE_UNALIGNED"); - Builder.defineMacro("__ARM_FEATURE_CLZ"); - Builder.defineMacro("__ARM_FEATURE_FMA"); - Builder.defineMacro("__ARM_FEATURE_DIV"); - - Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4"); - - // 0xe implies support for half, single and double precision operations. - Builder.defineMacro("__ARM_FP", "0xe"); - - // PCS specifies this for SysV variants, which is all we support. Other ABIs - // may choose __ARM_FP16_FORMAT_ALTERNATIVE. - Builder.defineMacro("__ARM_FP16_FORMAT_IEEE"); - - if (Opts.FastMath || Opts.FiniteMathOnly) - Builder.defineMacro("__ARM_FP_FAST"); - - if ((Opts.C99 || Opts.C11) && !Opts.Freestanding) - Builder.defineMacro("__ARM_FP_FENV_ROUNDING"); - - Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - Opts.ShortWChar ? "2" : "4"); - - Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", - Opts.ShortEnums ? "1" : "4"); - - if (FPU == NeonMode) { - Builder.defineMacro("__ARM_NEON"); - // 64-bit NEON supports half, single and double precision operations. - Builder.defineMacro("__ARM_NEON_FP", "0xe"); - } - - if (CRC) - Builder.defineMacro("__ARM_FEATURE_CRC32"); - - if (Crypto) { - Builder.defineMacro("__ARM_FEATURE_CRYPTO"); - } - } - void getTargetBuiltins(const Builtin::Info *&Records, - unsigned &NumRecords) const override { - Records = BuiltinInfo; - NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin; - } - bool hasFeature(StringRef Feature) const override { - return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode); - } - - bool setCPU(const std::string &Name) override { - return llvm::StringSwitch<bool>(Name) - .Case("generic", true) - .Cases("cortex-a53", "cortex-a57", true) - .Default(false); - } - - bool handleTargetFeatures(std::vector<std::string> &Features, - DiagnosticsEngine &Diags) override { - FPU = FPUMode; - CRC = 0; - Crypto = 0; - for (unsigned i = 0, e = Features.size(); i != e; ++i) { - if (Features[i] == "+neon") - FPU = NeonMode; - if (Features[i] == "+crc") - CRC = 1; - if (Features[i] == "+crypto") - Crypto = 1; - } - - setDescriptionString(); - - return true; - } - - void getGCCRegNames(const char *const *&Names, - unsigned &NumNames) const override; - void getGCCRegAliases(const GCCRegAlias *&Aliases, - unsigned &NumAliases) const override; - - bool isCLZForZeroUndef() const override { return false; } - - bool validateAsmConstraint(const char *&Name, - TargetInfo::ConstraintInfo &Info) const override { - switch (*Name) { - default: return false; - case 'w': // An FP/SIMD vector register - Info.setAllowsRegister(); - return true; - case 'I': // Constant that can be used with an ADD instruction - case 'J': // Constant that can be used with a SUB instruction - case 'K': // Constant that can be used with a 32-bit logical instruction - case 'L': // Constant that can be used with a 64-bit logical instruction - case 'M': // Constant that can be used as a 32-bit MOV immediate - case 'N': // Constant that can be used as a 64-bit MOV immediate - case 'Y': // Floating point constant zero - case 'Z': // Integer constant zero - return true; - case 'Q': // A memory reference with base register and no offset - Info.setAllowsMemory(); - return true; - case 'S': // A symbolic address - Info.setAllowsRegister(); - return true; - case 'U': - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be - // Usa: An absolute symbolic address - // Ush: The high part (bits 32:12) of a pc-relative symbolic address - llvm_unreachable("FIXME: Unimplemented support for bizarre constraints"); - } - } - - const char *getClobbers() const override { - // There are no AArch64 clobbers shared by all asm statements. - return ""; - } - - BuiltinVaListKind getBuiltinVaListKind() const override { - return TargetInfo::AArch64ABIBuiltinVaList; - } -}; - -const char * const AArch64TargetInfo::GCCRegNames[] = { - "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", - "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", - "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", - "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr", - - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", - "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr", - - "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", - "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", - "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", - "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31", - - "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", - "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", - "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", - "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", - - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", - "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", - "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", - "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", - - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", - "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", - "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", - - "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", - "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", - "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" -}; - -void AArch64TargetInfo::getGCCRegNames(const char * const *&Names, - unsigned &NumNames) const { - Names = GCCRegNames; - NumNames = llvm::array_lengthof(GCCRegNames); -} - -const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = { - { { "x16" }, "ip0"}, - { { "x17" }, "ip1"}, - { { "x29" }, "fp" }, - { { "x30" }, "lr" } -}; - -void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases, - unsigned &NumAliases) const { - Aliases = GCCRegAliases; - NumAliases = llvm::array_lengthof(GCCRegAliases); - -} - -const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = { -#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, -#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ - ALL_LANGUAGES }, -#include "clang/Basic/BuiltinsNEON.def" - -#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, -#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ - ALL_LANGUAGES }, -#include "clang/Basic/BuiltinsAArch64.def" -}; - -class AArch64leTargetInfo : public AArch64TargetInfo { - void setDescriptionString() override { - DescriptionString = "e-m:e-i64:64-i128:128-n32:64-S128"; - } - -public: - AArch64leTargetInfo(const llvm::Triple &Triple) - : AArch64TargetInfo(Triple) { - BigEndian = false; - } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - Builder.defineMacro("__AARCH64EL__"); - AArch64TargetInfo::getTargetDefines(Opts, Builder); - } -}; - -class AArch64beTargetInfo : public AArch64TargetInfo { - void setDescriptionString() override { - DescriptionString = "E-m:e-i64:64-i128:128-n32:64-S128"; - } - -public: - AArch64beTargetInfo(const llvm::Triple &Triple) - : AArch64TargetInfo(Triple) { } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - Builder.defineMacro("__AARCH64EB__"); - Builder.defineMacro("__AARCH_BIG_ENDIAN"); - Builder.defineMacro("__ARM_BIG_ENDIAN"); - AArch64TargetInfo::getTargetDefines(Opts, Builder); - } -}; - -} // end anonymous namespace namespace { class ARMTargetInfo : public TargetInfo { @@ -4537,11 +4254,23 @@ class ARM64TargetInfo : public TargetInfo { public: ARM64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple), ABI("aapcs") { + + if (getTriple().getOS() == llvm::Triple::NetBSD) { + WCharType = SignedInt; + + // NetBSD apparently prefers consistency across ARM targets to consistency + // across 64-bit targets. + Int64Type = SignedLongLong; + IntMaxType = SignedLongLong; + UIntMaxType = UnsignedLongLong; + } else { + WCharType = UnsignedInt; + Int64Type = SignedLong; + IntMaxType = SignedLong; + UIntMaxType = UnsignedLong; + } + LongWidth = LongAlign = PointerWidth = PointerAlign = 64; - IntMaxType = SignedLong; - UIntMaxType = UnsignedLong; - Int64Type = SignedLong; - WCharType = UnsignedInt; MaxVectorAlign = 128; RegParmMax = 8; MaxAtomicInlineWidth = 128; @@ -6218,21 +5947,21 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) { case llvm::Triple::aarch64: switch (os) { case llvm::Triple::Linux: - return new LinuxTargetInfo<AArch64leTargetInfo>(Triple); + return new LinuxTargetInfo<ARM64leTargetInfo>(Triple); case llvm::Triple::NetBSD: - return new NetBSDTargetInfo<AArch64leTargetInfo>(Triple); + return new NetBSDTargetInfo<ARM64leTargetInfo>(Triple); default: - return new AArch64leTargetInfo(Triple); + return new ARM64leTargetInfo(Triple); } case llvm::Triple::aarch64_be: switch (os) { case llvm::Triple::Linux: - return new LinuxTargetInfo<AArch64beTargetInfo>(Triple); + return new LinuxTargetInfo<ARM64beTargetInfo>(Triple); case llvm::Triple::NetBSD: - return new NetBSDTargetInfo<AArch64beTargetInfo>(Triple); + return new NetBSDTargetInfo<ARM64beTargetInfo>(Triple); default: - return new AArch64beTargetInfo(Triple); + return new ARM64beTargetInfo(Triple); } case llvm::Triple::arm: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9d692d8e1e2..585db1778bf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1637,14 +1637,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (getTarget().getTriple().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: return EmitARMBuiltinExpr(BuiltinID, E); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: return EmitARM64BuiltinExpr(BuiltinID, E); @@ -1883,354 +1882,6 @@ enum { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ #NameBase, TypeModifier } -static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = { - NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0), - NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0), - NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0), - NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01), - NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01), - NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0), - NEONMAP0(vdupb_lane_i8), - NEONMAP0(vdupb_laneq_i8), - NEONMAP0(vdupd_lane_f64), - NEONMAP0(vdupd_lane_i64), - NEONMAP0(vdupd_laneq_f64), - NEONMAP0(vdupd_laneq_i64), - NEONMAP0(vduph_lane_i16), - NEONMAP0(vduph_laneq_i16), - NEONMAP0(vdups_lane_f32), - NEONMAP0(vdups_lane_i32), - NEONMAP0(vdups_laneq_f32), - NEONMAP0(vdups_laneq_i32), - NEONMAP0(vfmad_lane_f64), - NEONMAP0(vfmad_laneq_f64), - NEONMAP0(vfmas_lane_f32), - NEONMAP0(vfmas_laneq_f32), - NEONMAP0(vget_lane_f32), - NEONMAP0(vget_lane_f64), - NEONMAP0(vget_lane_i16), - NEONMAP0(vget_lane_i32), - NEONMAP0(vget_lane_i64), - NEONMAP0(vget_lane_i8), - NEONMAP0(vgetq_lane_f32), - NEONMAP0(vgetq_lane_f64), - NEONMAP0(vgetq_lane_i16), - NEONMAP0(vgetq_lane_i32), - NEONMAP0(vgetq_lane_i64), - NEONMAP0(vgetq_lane_i8), - NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0), - NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0), - NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0), - NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0), - NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP0(vmul_n_f64), - NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0), - NEONMAP0(vmulxd_f64), - NEONMAP0(vmulxs_f32), - NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0), - NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0), - NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0), - NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet), - NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet), - NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet), - NEONMAP0(vqdmlalh_lane_s16), - NEONMAP0(vqdmlalh_laneq_s16), - NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlals_lane_s32), - NEONMAP0(vqdmlals_laneq_s32), - NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlslh_lane_s16), - NEONMAP0(vqdmlslh_laneq_s16), - NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP0(vqdmlsls_lane_s32), - NEONMAP0(vqdmlsls_laneq_s32), - NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet), - NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet), - NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0), - NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0), - NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet), - NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet), - NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0), - NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0), - NEONMAP0(vset_lane_f32), - NEONMAP0(vset_lane_f64), - NEONMAP0(vset_lane_i16), - NEONMAP0(vset_lane_i32), - NEONMAP0(vset_lane_i64), - NEONMAP0(vset_lane_i8), - NEONMAP0(vsetq_lane_f32), - NEONMAP0(vsetq_lane_f64), - NEONMAP0(vsetq_lane_i16), - NEONMAP0(vsetq_lane_i32), - NEONMAP0(vsetq_lane_i64), - NEONMAP0(vsetq_lane_i8), - NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0), - NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0), - NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0), - NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0), - NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0), - NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0), - NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0), - NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0), - NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0), - NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0), - NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0), - NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0), - NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet) -}; - static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), @@ -2739,7 +2390,6 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = { #undef NEONMAP2 static bool NEONSIMDIntrinsicsProvenSorted = false; -static bool AArch64SISDIntrinsicInfoProvenSorted = false; static bool ARM64SIMDIntrinsicsProvenSorted = false; static bool ARM64SISDIntrinsicsProvenSorted = false; @@ -2869,169 +2519,6 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } -static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, - const NeonIntrinsicInfo &SISDInfo, - const CallExpr *E) { - unsigned BuiltinID = SISDInfo.BuiltinID; - unsigned int Int = SISDInfo.LLVMIntrinsic; - const char *s = SISDInfo.NameHint; - - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - switch (BuiltinID) { - default: break; - case NEON::BI__builtin_neon_vdups_lane_f32: - case NEON::BI__builtin_neon_vdupd_lane_f64: - case NEON::BI__builtin_neon_vdups_laneq_f32: - case NEON::BI__builtin_neon_vdupd_laneq_f64: { - return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane"); - } - case NEON::BI__builtin_neon_vdupb_lane_i8: - case NEON::BI__builtin_neon_vduph_lane_i16: - case NEON::BI__builtin_neon_vdups_lane_i32: - case NEON::BI__builtin_neon_vdupd_lane_i64: - case NEON::BI__builtin_neon_vdupb_laneq_i8: - case NEON::BI__builtin_neon_vduph_laneq_i16: - case NEON::BI__builtin_neon_vdups_laneq_i32: - case NEON::BI__builtin_neon_vdupd_laneq_i64: { - // The backend treats Neon scalar types as v1ix types - // So we want to dup lane from any vector to v1ix vector - // with shufflevector - s = "vdup_lane"; - Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1])); - Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s); - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - // AArch64 intrinsic one-element vector type cast to - // scalar type expected by the builtin - return CGF.Builder.CreateBitCast(Result, Ty, s); - } - case NEON::BI__builtin_neon_vqdmlalh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlals_lane_s32 : - case NEON::BI__builtin_neon_vqdmlals_laneq_s32 : - case NEON::BI__builtin_neon_vqdmlslh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlsls_lane_s32 : - case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : { - Int = Intrinsic::arm_neon_vqadds; - if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) { - Int = Intrinsic::arm_neon_vqsubs; - } - // create vqdmull call with b * c[i] - llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType()); - llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1); - Ty = CGF.ConvertType(E->getArg(0)->getType()); - llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy); - Value *V = UndefValue::get(OpVTy); - llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0); - SmallVector<Value *, 2> MulOps; - MulOps.push_back(Ops[1]); - MulOps.push_back(Ops[2]); - MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI); - MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract"); - MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI); - Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]); - // create vqadds call with a +/- vqdmull result - F = CGF.CGM.getIntrinsic(Int, ResVTy); - SmallVector<Value *, 2> AddOps; - AddOps.push_back(Ops[0]); - AddOps.push_back(MulRes); - V = UndefValue::get(ResVTy); - AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI); - Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]); - return CGF.Builder.CreateBitCast(AddRes, Ty); - } - case NEON::BI__builtin_neon_vfmas_lane_f32: - case NEON::BI__builtin_neon_vfmas_laneq_f32: - case NEON::BI__builtin_neon_vfmad_lane_f64: - case NEON::BI__builtin_neon_vfmad_laneq_f64: { - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - // Scalar Floating-point Multiply Extended - case NEON::BI__builtin_neon_vmulxs_f32: - case NEON::BI__builtin_neon_vmulxd_f64: { - Int = Intrinsic::aarch64_neon_vmulx; - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vmul_n_f64: { - // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane - llvm::Type *VTy = GetNeonType(&CGF, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy); - llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0); - Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract"); - Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]); - return CGF.Builder.CreateBitCast(Result, VTy); - } - case NEON::BI__builtin_neon_vget_lane_i8: - case NEON::BI__builtin_neon_vget_lane_i16: - case NEON::BI__builtin_neon_vget_lane_i32: - case NEON::BI__builtin_neon_vget_lane_i64: - case NEON::BI__builtin_neon_vget_lane_f32: - case NEON::BI__builtin_neon_vget_lane_f64: - case NEON::BI__builtin_neon_vgetq_lane_i8: - case NEON::BI__builtin_neon_vgetq_lane_i16: - case NEON::BI__builtin_neon_vgetq_lane_i32: - case NEON::BI__builtin_neon_vgetq_lane_i64: - case NEON::BI__builtin_neon_vgetq_lane_f32: - case NEON::BI__builtin_neon_vgetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E); - case NEON::BI__builtin_neon_vset_lane_i8: - case NEON::BI__builtin_neon_vset_lane_i16: - case NEON::BI__builtin_neon_vset_lane_i32: - case NEON::BI__builtin_neon_vset_lane_i64: - case NEON::BI__builtin_neon_vset_lane_f32: - case NEON::BI__builtin_neon_vset_lane_f64: - case NEON::BI__builtin_neon_vsetq_lane_i8: - case NEON::BI__builtin_neon_vsetq_lane_i16: - case NEON::BI__builtin_neon_vsetq_lane_i32: - case NEON::BI__builtin_neon_vsetq_lane_i64: - case NEON::BI__builtin_neon_vsetq_lane_f32: - case NEON::BI__builtin_neon_vsetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E); - - case NEON::BI__builtin_neon_vceqzd_s64: - case NEON::BI__builtin_neon_vceqzd_u64: - case NEON::BI__builtin_neon_vcgezd_s64: - case NEON::BI__builtin_neon_vcgtzd_s64: - case NEON::BI__builtin_neon_vclezd_s64: - case NEON::BI__builtin_neon_vcltzd_s64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - break; - case NEON::BI__builtin_neon_vceqzs_f32: - case NEON::BI__builtin_neon_vceqzd_f64: - case NEON::BI__builtin_neon_vcgezs_f32: - case NEON::BI__builtin_neon_vcgezd_f64: - case NEON::BI__builtin_neon_vcgtzs_f32: - case NEON::BI__builtin_neon_vcgtzd_f64: - case NEON::BI__builtin_neon_vclezs_f32: - case NEON::BI__builtin_neon_vclezd_f64: - case NEON::BI__builtin_neon_vcltzs_f32: - case NEON::BI__builtin_neon_vcltzd_f64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); - break; - } - - // It didn't need any handling specific to the AArch64 backend, so defer to - // common code. - return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E); -} - Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, @@ -3534,796 +3021,6 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, return CGF.EmitNeonCall(TblF, TblOps, Name); } -static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, - unsigned BuiltinID, - const CallExpr *E) { - unsigned int Int = 0; - const char *s = nullptr; - - switch (BuiltinID) { - default: - return nullptr; - case NEON::BI__builtin_neon_vtbl1_v: - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - case NEON::BI__builtin_neon_vtbl2_v: - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: - case NEON::BI__builtin_neon_vtbl3_v: - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - case NEON::BI__builtin_neon_vtbl4_v: - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - case NEON::BI__builtin_neon_vtbx1_v: - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - case NEON::BI__builtin_neon_vtbx2_v: - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - case NEON::BI__builtin_neon_vtbx3_v: - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - case NEON::BI__builtin_neon_vtbx4_v: - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - break; - } - - assert(E->getNumArgs() >= 3); - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *VTy = GetNeonType(&CGF, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - unsigned nElts = VTy->getNumElements(); - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - SmallVector<Value *, 2> TblOps; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vtbl1_v: { - TblOps.push_back(Ops[0]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl2_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl3_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbl4_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbx1_v: { - TblOps.push_back(Ops[1]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - - llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); - Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector<Value *, 4> BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx2_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, - Intrinsic::aarch64_neon_vtbx1, "vtbx1"); - } - case NEON::BI__builtin_neon_vtbx3_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - - llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); - Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], - TwentyFourV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector<Value *, 4> BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx4_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - TblOps.push_back(Ops[4]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, - Intrinsic::aarch64_neon_vtbx2, "vtbx2"); - } - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: { - Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; - } - } - - if (!Int) - return nullptr; - - Function *F = CGF.CGM.getIntrinsic(Int, Ty); - return CGF.EmitNeonCall(F, Ops, s); -} - -Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - - // Process AArch64 scalar builtins - llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( - SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted); - - if (Builtin) { - Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E); - assert(Result && "SISD intrinsic should have been handled"); - return Result; - } - - // Process AArch64 table lookup builtins - if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) - return Result; - - if (BuiltinID == AArch64::BI__clear_cache) { - assert(E->getNumArgs() == 2 && - "Variadic __clear_cache slipped through on AArch64"); - - const FunctionDecl *FD = E->getDirectCallee(); - SmallVector<Value *, 2> Ops; - for (unsigned i = 0; i < E->getNumArgs(); i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); - llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); - StringRef Name = FD->getName(); - return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); - } - - SmallVector<Value *, 4> Ops; - llvm::Value *Align = nullptr; // Alignment for load/store - - if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) { - Value *Op = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast<llvm::PointerType>(Op->getType())->getAddressSpace(); - llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op = Builder.CreateBitCast(Op, Ty); - Op = Builder.CreateLoad(Op); - Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); - return Builder.CreateBitCast(Op, Ty); - } - if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast<llvm::PointerType>(Op0->getType())->getAddressSpace(); - llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op0 = Builder.CreateBitCast(Op0, PTy); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext()); - Op1 = Builder.CreateBitCast(Op1, Ty); - return Builder.CreateStore(Op1, Op0); - } - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - if (i == 0) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: - case NEON::BI__builtin_neon_vst1_v: - case NEON::BI__builtin_neon_vst1q_v: - case NEON::BI__builtin_neon_vst2_v: - case NEON::BI__builtin_neon_vst2q_v: - case NEON::BI__builtin_neon_vst3_v: - case NEON::BI__builtin_neon_vst3q_v: - case NEON::BI__builtin_neon_vst4_v: - case NEON::BI__builtin_neon_vst4q_v: - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - // Handle ld1/st1 lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: - case NEON::BI__builtin_neon_vst2_lane_v: - case NEON::BI__builtin_neon_vst2q_lane_v: - case NEON::BI__builtin_neon_vst3_lane_v: - case NEON::BI__builtin_neon_vst3q_lane_v: - case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: - case NEON::BI__builtin_neon_vld1_dup_v: - case NEON::BI__builtin_neon_vld1q_dup_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair<llvm::Value *, unsigned> Src = - EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - if (i == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_v: - case NEON::BI__builtin_neon_vld2q_v: - case NEON::BI__builtin_neon_vld3_v: - case NEON::BI__builtin_neon_vld3q_v: - case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - // Handle ld1/st1 dup lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - case NEON::BI__builtin_neon_vld2_lane_v: - case NEON::BI__builtin_neon_vld2q_lane_v: - case NEON::BI__builtin_neon_vld3_lane_v: - case NEON::BI__builtin_neon_vld3q_lane_v: - case NEON::BI__builtin_neon_vld4_lane_v: - case NEON::BI__builtin_neon_vld4q_lane_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair<llvm::Value *, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - bool usgn = Type.isUnsigned(); - bool quad = Type.isQuad(); - - llvm::VectorType *VTy = GetNeonType(this, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - // Many NEON builtins have identical semantics and uses in ARM and - // AArch64. Emit these in a single function. - llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap); - Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID, - NEONSIMDIntrinsicsProvenSorted); - if (Builtin) - return EmitCommonNeonBuiltinExpr( - Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); - - unsigned Int; - switch (BuiltinID) { - default: - return nullptr; - - // AArch64 builtins mapping to legacy ARM v7 builtins. - // FIXME: the mapped builtins listed correspond to what has been tested - // in aarch64-neon-intrinsics.c so far. - - // Shift by immediate - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); - case NEON::BI__builtin_neon_vsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vsradu_n - : Intrinsic::aarch64_neon_vsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n"); - } - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E); - case NEON::BI__builtin_neon_vsraq_n_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E); - case NEON::BI__builtin_neon_vrsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n - : Intrinsic::aarch64_neon_vrsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n"); - } - // fall through - case NEON::BI__builtin_neon_vrsraq_n_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); - return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); - } - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - Int = Intrinsic::aarch64_neon_vsqshlu; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); - case NEON::BI__builtin_neon_vsri_n_v: - case NEON::BI__builtin_neon_vsriq_n_v: - Int = Intrinsic::aarch64_neon_vsri; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); - case NEON::BI__builtin_neon_vsli_n_v: - case NEON::BI__builtin_neon_vsliq_n_v: - Int = Intrinsic::aarch64_neon_vsli; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); - case NEON::BI__builtin_neon_vqshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); - case NEON::BI__builtin_neon_vrshrn_n_v: - Int = Intrinsic::aarch64_neon_vrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); - case NEON::BI__builtin_neon_vqrshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqrshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); - case NEON::BI__builtin_neon_vqshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqshrn - : Intrinsic::aarch64_neon_vsqshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); - case NEON::BI__builtin_neon_vqrshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn - : Intrinsic::aarch64_neon_vsqrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); - - // Convert - case NEON::BI__builtin_neon_vcvt_n_f64_v: - case NEON::BI__builtin_neon_vcvtq_n_f64_v: { - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - llvm::Type *Tys[2] = { FloatTy, Ty }; - Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp - : Intrinsic::arm_neon_vcvtfxs2fp; - Function *F = CGM.getIntrinsic(Int, Tys); - return EmitNeonCall(F, Ops, "vcvt_n"); - } - - // Load/Store - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: { - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - Int = Intrinsic::aarch64_neon_vld1x2; - break; - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - Int = Intrinsic::aarch64_neon_vld1x3; - break; - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - Int = Intrinsic::aarch64_neon_vld1x4; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: { - Ops.push_back(Align); - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - Int = Intrinsic::aarch64_neon_vst1x2; - break; - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - Int = Intrinsic::aarch64_neon_vst1x3; - break; - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - Int = Intrinsic::aarch64_neon_vst1x4; - break; - } - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); - } - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); - return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); - } - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - StoreInst *St = - Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); - St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); - return St; - } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: { - // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && - VTy->getNumElements() == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: - llvm_unreachable("unknown vld_dup intrinsic?"); - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); - - SmallVector<Value *, 6> Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(Align); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - - case NEON::BI__builtin_neon_vmul_lane_v: - case NEON::BI__builtin_neon_vmul_laneq_v: { - // v1f64 vmul_lane should be mapped to Neon scalar mul lane - bool Quad = false; - if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) - Quad = true; - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); - Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); - return Builder.CreateBitCast(Result, Ty); - } - - // AArch64-only builtins - case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfmaq_lane_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() / 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast<ConstantInt>(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_lane_v: { - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_laneq_v: { - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() * 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast<ConstantInt>(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - - // LLVM's fma intrinsic puts the accumulator in the last position, but the - // AArch64 intrinsic has it first. - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - case NEON::BI__builtin_neon_vmaxnm_v: - case NEON::BI__builtin_neon_vmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); - } - case NEON::BI__builtin_neon_vminnm_v: - case NEON::BI__builtin_neon_vminnmq_v: { - Int = Intrinsic::aarch64_neon_vminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); - } - case NEON::BI__builtin_neon_vpmaxnm_v: - case NEON::BI__builtin_neon_vpmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vpmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); - } - case NEON::BI__builtin_neon_vpminnm_v: - case NEON::BI__builtin_neon_vpminnmq_v: { - Int = Intrinsic::aarch64_neon_vpminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); - } - case NEON::BI__builtin_neon_vpmaxq_v: { - Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); - } - case NEON::BI__builtin_neon_vpminq_v: { - Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); - } - case NEON::BI__builtin_neon_vmulx_v: - case NEON::BI__builtin_neon_vmulxq_v: { - Int = Intrinsic::aarch64_neon_vmulx; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vsqadd_v: - case NEON::BI__builtin_neon_vsqaddq_v: { - Int = Intrinsic::aarch64_neon_usqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); - } - case NEON::BI__builtin_neon_vuqadd_v: - case NEON::BI__builtin_neon_vuqaddq_v: { - Int = Intrinsic::aarch64_neon_suqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); - } - case NEON::BI__builtin_neon_vrbit_v: - case NEON::BI__builtin_neon_vrbitq_v: - Int = Intrinsic::aarch64_neon_rbit; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); - case NEON::BI__builtin_neon_vcvt_f32_f64: { - NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); - return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvtx_f32_v: { - llvm::Type *EltTy = FloatTy; - llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); - llvm::Type *Tys[2] = { ResTy, Ty }; - Int = Intrinsic::aarch64_neon_vcvtxn; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); - } - case NEON::BI__builtin_neon_vcvt_f64_f32: { - llvm::Type *OpTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); - Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); - return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvt_f64_v: - case NEON::BI__builtin_neon_vcvtq_f64_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") - : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vrndn_v: - case NEON::BI__builtin_neon_vrndnq_v: { - Int = Intrinsic::aarch64_neon_frintn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); - } - case NEON::BI__builtin_neon_vrnda_v: - case NEON::BI__builtin_neon_vrndaq_v: { - Int = Intrinsic::round; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); - } - case NEON::BI__builtin_neon_vrndp_v: - case NEON::BI__builtin_neon_vrndpq_v: { - Int = Intrinsic::ceil; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); - } - case NEON::BI__builtin_neon_vrndm_v: - case NEON::BI__builtin_neon_vrndmq_v: { - Int = Intrinsic::floor; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); - } - case NEON::BI__builtin_neon_vrndx_v: - case NEON::BI__builtin_neon_vrndxq_v: { - Int = Intrinsic::rint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); - } - case NEON::BI__builtin_neon_vrnd_v: - case NEON::BI__builtin_neon_vrndq_v: { - Int = Intrinsic::trunc; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); - } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { - Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); - } - case NEON::BI__builtin_neon_vsqrt_v: - case NEON::BI__builtin_neon_vsqrtq_v: { - Int = Intrinsic::sqrt; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); - } - case NEON::BI__builtin_neon_vceqz_v: - case NEON::BI__builtin_neon_vceqzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); - case NEON::BI__builtin_neon_vcgez_v: - case NEON::BI__builtin_neon_vcgezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); - case NEON::BI__builtin_neon_vclez_v: - case NEON::BI__builtin_neon_vclezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); - case NEON::BI__builtin_neon_vcgtz_v: - case NEON::BI__builtin_neon_vcgtzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); - case NEON::BI__builtin_neon_vcltz_v: - case NEON::BI__builtin_neon_vcltzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); - } -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { unsigned HintID = static_cast<unsigned>(-1); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 944a0cf13b0..74433392e15 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2201,8 +2201,6 @@ public: const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name = ""); - llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty); - llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 52e41322de0..88c4d96c1d3 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -4512,221 +4512,6 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, } //===----------------------------------------------------------------------===// -// AArch64 ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class AArch64ABIInfo : public ABIInfo { -public: - AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} - -private: - // The AArch64 PCS is explicit about return types and argument types being - // handled identically, so we don't need to draw a distinction between - // Argument and Return classification. - ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs, - int &FreeVFPRegs) const; - - ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt, - llvm::Type *DirectTy = nullptr) const; - - void computeInfo(CGFunctionInfo &FI) const override; - - llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const override; -}; - -class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { -public: - AArch64TargetCodeGenInfo(CodeGenTypes &CGT) - :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {} - - const AArch64ABIInfo &getABIInfo() const { - return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); - } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 31; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - // 0-31 are x0-x30 and sp: 8 bytes each - llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); - AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31); - - // 64-95 are v0-v31: 16 bytes each - llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); - AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95); - - return false; - } - -}; - -} - -void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - int FreeIntRegs = 8, FreeVFPRegs = 8; - - FI.getReturnInfo() = classifyGenericType(FI.getReturnType(), - FreeIntRegs, FreeVFPRegs); - - FreeIntRegs = FreeVFPRegs = 8; - for (auto &I : FI.arguments()) { - I.info = classifyGenericType(I.type, FreeIntRegs, FreeVFPRegs); - - } -} - -ABIArgInfo -AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, - bool IsInt, llvm::Type *DirectTy) const { - if (FreeRegs >= RegsNeeded) { - FreeRegs -= RegsNeeded; - return ABIArgInfo::getDirect(DirectTy); - } - - llvm::Type *Padding = nullptr; - - // We need padding so that later arguments don't get filled in anyway. That - // wouldn't happen if only ByVal arguments followed in the same category, but - // a large structure will simply seem to be a pointer as far as LLVM is - // concerned. - if (FreeRegs > 0) { - if (IsInt) - Padding = llvm::Type::getInt64Ty(getVMContext()); - else - Padding = llvm::Type::getFloatTy(getVMContext()); - - // Either [N x i64] or [N x float]. - Padding = llvm::ArrayType::get(Padding, FreeRegs); - FreeRegs = 0; - } - - return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8, - /*IsByVal=*/ true, /*Realign=*/ false, - Padding); -} - - -ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty, - int &FreeIntRegs, - int &FreeVFPRegs) const { - // Can only occurs for return, but harmless otherwise. - if (Ty->isVoidType()) - return ABIArgInfo::getIgnore(); - - // Large vector types should be returned via memory. There's no such concept - // in the ABI, but they'd be over 16 bytes anyway so no matter how they're - // classified they'd go into memory (see B.3). - if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) { - if (FreeIntRegs > 0) - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, /*ByVal=*/false); - } - - // All non-aggregate LLVM types have a concrete ABI representation so they can - // be passed directly. After this block we're guaranteed to be in a - // complicated case. - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - if (Ty->isFloatingType() || Ty->isVectorType()) - return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false); - - assert(getContext().getTypeSize(Ty) <= 128 && - "unexpectedly large scalar type"); - - int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; - - // If the type may need padding registers to ensure "alignment", we must be - // careful when this is accounted for. Increasing the effective size covers - // all cases. - if (getContext().getTypeAlign(Ty) == 128) - RegsNeeded += FreeIntRegs % 2 != 0; - - return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true); - } - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - if (FreeIntRegs > 0 && RAA == CGCXXABI::RAA_Indirect) - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory); - } - - if (isEmptyRecord(getContext(), Ty, true)) { - if (!getContext().getLangOpts().CPlusPlus) { - // Empty structs outside C++ mode are a GNU extension, so no ABI can - // possibly tell us what to do. It turns out (I believe) that GCC ignores - // the object for parameter-passsing purposes. - return ABIArgInfo::getIgnore(); - } - - // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode - // description of va_arg in the PCS require that an empty struct does - // actually occupy space for parameter-passing. I'm hoping for a - // clarification giving an explicit paragraph to point to in future. - return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true, - llvm::Type::getInt8Ty(getVMContext())); - } - - // Homogeneous vector aggregates get passed in registers or on the stack. - const Type *Base = nullptr; - uint64_t NumMembers = 0; - if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Homogeneous aggregates are passed and returned directly. - return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers, - /*IsInt=*/ false); - } - - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 128) { - // Small structs can use the same direct type whether they're in registers - // or on the stack. - llvm::Type *BaseTy; - unsigned NumBases; - int SizeInRegs = (Size + 63) / 64; - - if (getContext().getTypeAlign(Ty) == 128) { - BaseTy = llvm::Type::getIntNTy(getVMContext(), 128); - NumBases = 1; - - // If the type may need padding registers to ensure "alignment", we must - // be careful when this is accounted for. Increasing the effective size - // covers all cases. - SizeInRegs += FreeIntRegs % 2 != 0; - } else { - BaseTy = llvm::Type::getInt64Ty(getVMContext()); - NumBases = SizeInRegs; - } - llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases); - - return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs, - /*IsInt=*/ true, DirectTy); - } - - // If the aggregate is > 16 bytes, it's passed and returned indirectly. In - // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere. - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, /* byVal = */ false); -} - -llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - int FreeIntRegs = 8, FreeVFPRegs = 8; - Ty = CGF.getContext().getCanonicalType(Ty); - ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs); - - return EmitAArch64VAArg(VAListAddr, Ty, 8 - FreeIntRegs, 8 - FreeVFPRegs, - AI.isIndirect(), CGF); -} - -//===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// @@ -6684,6 +6469,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: { ARM64ABIInfo::ABIKind Kind = ARM64ABIInfo::AAPCS; @@ -6693,10 +6480,6 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return *(TheTargetCodeGenInfo = new ARM64TargetCodeGenInfo(Types, Kind)); } - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types)); - case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp index 7670daa1713..4c097c5120a 100644 --- a/clang/lib/Driver/Tools.cpp +++ b/clang/lib/Driver/Tools.cpp @@ -444,26 +444,6 @@ void Clang::AddPreprocessingOptions(Compilation &C, getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs); } -/// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are targeting. -// -// FIXME: tblgen this. -static std::string getAArch64TargetCPU(const ArgList &Args, - const llvm::Triple &Triple) { - // FIXME: Warn on inconsistent use of -mcpu and -march. - - // If we have -mcpu=, use that. - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef MCPU = A->getValue(); - // Handle -mcpu=native. - if (MCPU == "native") - return llvm::sys::getHostCPUName(); - else - return MCPU; - } - - return "generic"; -} - // FIXME: Move to target hook. static bool isSignedCharDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { @@ -1345,8 +1325,6 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - return getAArch64TargetCPU(Args, T); - case llvm::Triple::arm64: case llvm::Triple::arm64_be: return getARM64TargetCPU(Args); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7974b6f443e..78ba66bfb9b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -309,16 +309,13 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: if (CheckARM64BuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall)) - return ExprError(); - break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -472,14 +469,6 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); } -bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, - CallExpr *TheCall) { - if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall)) - return true; - - return false; -} - bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall, unsigned MaxWidth) { assert((BuiltinID == ARM::BI__builtin_arm_ldrex || |