summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp8
-rw-r--r--clang/lib/Basic/Targets.cpp315
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp1307
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h2
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp221
-rw-r--r--clang/lib/Driver/Tools.cpp22
-rw-r--r--clang/lib/Sema/SemaChecking.cpp15
7 files changed, 32 insertions, 1858 deletions
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index f1a16b50337..58a44b7645a 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2300,10 +2300,10 @@ void CXXNameMangler::mangleType(const VectorType *T) {
llvm::Triple Target = getASTContext().getTargetInfo().getTriple();
llvm::Triple::ArchType Arch =
getASTContext().getTargetInfo().getTriple().getArch();
- if (Arch == llvm::Triple::aarch64 ||
- Arch == llvm::Triple::aarch64_be ||
- Arch == llvm::Triple::arm64_be ||
- (Arch == llvm::Triple::arm64 && !Target.isOSDarwin()))
+ if ((Arch == llvm::Triple::aarch64 ||
+ Arch == llvm::Triple::aarch64_be ||
+ Arch == llvm::Triple::arm64_be ||
+ Arch == llvm::Triple::arm64) && !Target.isOSDarwin())
mangleAArch64NeonVectorType(T);
else
mangleNeonVectorType(T);
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index 62d44be9deb..82d79f7a153 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -3408,289 +3408,6 @@ public:
};
}
-namespace {
-class AArch64TargetInfo : public TargetInfo {
- virtual void setDescriptionString() = 0;
- static const char * const GCCRegNames[];
- static const TargetInfo::GCCRegAlias GCCRegAliases[];
-
- enum FPUModeEnum {
- FPUMode,
- NeonMode
- };
-
- unsigned FPU;
- unsigned CRC;
- unsigned Crypto;
- static const Builtin::Info BuiltinInfo[];
-
-public:
- AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
- LongWidth = LongAlign = 64;
- LongDoubleWidth = LongDoubleAlign = 128;
- PointerWidth = PointerAlign = 64;
- SuitableAlign = 128;
-
- WCharType = UnsignedInt;
- if (getTriple().getOS() == llvm::Triple::NetBSD) {
- WCharType = SignedInt;
- Int64Type = SignedLongLong;
- IntMaxType = SignedLongLong;
- UIntMaxType = UnsignedLongLong;
- } else {
- WCharType = UnsignedInt;
- Int64Type = SignedLong;
- IntMaxType = SignedLong;
- UIntMaxType = UnsignedLong;
- }
- LongDoubleFormat = &llvm::APFloat::IEEEquad;
-
- // AArch64 backend supports 64-bit operations at the moment. In principle
- // 128-bit is possible if register-pairs are used.
- MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-
- TheCXXABI.set(TargetCXXABI::GenericAArch64);
- }
- void getTargetDefines(const LangOptions &Opts,
- MacroBuilder &Builder) const override {
- // GCC defines theses currently
- Builder.defineMacro("__aarch64__");
-
- // ACLE predefines. Many can only have one possible value on v8 AArch64.
- Builder.defineMacro("__ARM_ACLE", "200");
- Builder.defineMacro("__ARM_ARCH", "8");
- Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'");
-
- Builder.defineMacro("__ARM_64BIT_STATE");
- Builder.defineMacro("__ARM_PCS_AAPCS64");
- Builder.defineMacro("__ARM_ARCH_ISA_A64");
-
- Builder.defineMacro("__ARM_FEATURE_UNALIGNED");
- Builder.defineMacro("__ARM_FEATURE_CLZ");
- Builder.defineMacro("__ARM_FEATURE_FMA");
- Builder.defineMacro("__ARM_FEATURE_DIV");
-
- Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4");
-
- // 0xe implies support for half, single and double precision operations.
- Builder.defineMacro("__ARM_FP", "0xe");
-
- // PCS specifies this for SysV variants, which is all we support. Other ABIs
- // may choose __ARM_FP16_FORMAT_ALTERNATIVE.
- Builder.defineMacro("__ARM_FP16_FORMAT_IEEE");
-
- if (Opts.FastMath || Opts.FiniteMathOnly)
- Builder.defineMacro("__ARM_FP_FAST");
-
- if ((Opts.C99 || Opts.C11) && !Opts.Freestanding)
- Builder.defineMacro("__ARM_FP_FENV_ROUNDING");
-
- Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
- Opts.ShortWChar ? "2" : "4");
-
- Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
- Opts.ShortEnums ? "1" : "4");
-
- if (FPU == NeonMode) {
- Builder.defineMacro("__ARM_NEON");
- // 64-bit NEON supports half, single and double precision operations.
- Builder.defineMacro("__ARM_NEON_FP", "0xe");
- }
-
- if (CRC)
- Builder.defineMacro("__ARM_FEATURE_CRC32");
-
- if (Crypto) {
- Builder.defineMacro("__ARM_FEATURE_CRYPTO");
- }
- }
- void getTargetBuiltins(const Builtin::Info *&Records,
- unsigned &NumRecords) const override {
- Records = BuiltinInfo;
- NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin;
- }
- bool hasFeature(StringRef Feature) const override {
- return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode);
- }
-
- bool setCPU(const std::string &Name) override {
- return llvm::StringSwitch<bool>(Name)
- .Case("generic", true)
- .Cases("cortex-a53", "cortex-a57", true)
- .Default(false);
- }
-
- bool handleTargetFeatures(std::vector<std::string> &Features,
- DiagnosticsEngine &Diags) override {
- FPU = FPUMode;
- CRC = 0;
- Crypto = 0;
- for (unsigned i = 0, e = Features.size(); i != e; ++i) {
- if (Features[i] == "+neon")
- FPU = NeonMode;
- if (Features[i] == "+crc")
- CRC = 1;
- if (Features[i] == "+crypto")
- Crypto = 1;
- }
-
- setDescriptionString();
-
- return true;
- }
-
- void getGCCRegNames(const char *const *&Names,
- unsigned &NumNames) const override;
- void getGCCRegAliases(const GCCRegAlias *&Aliases,
- unsigned &NumAliases) const override;
-
- bool isCLZForZeroUndef() const override { return false; }
-
- bool validateAsmConstraint(const char *&Name,
- TargetInfo::ConstraintInfo &Info) const override {
- switch (*Name) {
- default: return false;
- case 'w': // An FP/SIMD vector register
- Info.setAllowsRegister();
- return true;
- case 'I': // Constant that can be used with an ADD instruction
- case 'J': // Constant that can be used with a SUB instruction
- case 'K': // Constant that can be used with a 32-bit logical instruction
- case 'L': // Constant that can be used with a 64-bit logical instruction
- case 'M': // Constant that can be used as a 32-bit MOV immediate
- case 'N': // Constant that can be used as a 64-bit MOV immediate
- case 'Y': // Floating point constant zero
- case 'Z': // Integer constant zero
- return true;
- case 'Q': // A memory reference with base register and no offset
- Info.setAllowsMemory();
- return true;
- case 'S': // A symbolic address
- Info.setAllowsRegister();
- return true;
- case 'U':
- // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be
- // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
- // Usa: An absolute symbolic address
- // Ush: The high part (bits 32:12) of a pc-relative symbolic address
- llvm_unreachable("FIXME: Unimplemented support for bizarre constraints");
- }
- }
-
- const char *getClobbers() const override {
- // There are no AArch64 clobbers shared by all asm statements.
- return "";
- }
-
- BuiltinVaListKind getBuiltinVaListKind() const override {
- return TargetInfo::AArch64ABIBuiltinVaList;
- }
-};
-
-const char * const AArch64TargetInfo::GCCRegNames[] = {
- "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
- "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
- "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
- "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr",
-
- "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
- "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
- "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
- "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr",
-
- "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
- "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
- "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
- "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31",
-
- "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
- "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
- "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
- "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31",
-
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
- "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
- "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
- "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
-
- "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
- "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
- "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
-
- "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
- "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
- "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31"
-};
-
-void AArch64TargetInfo::getGCCRegNames(const char * const *&Names,
- unsigned &NumNames) const {
- Names = GCCRegNames;
- NumNames = llvm::array_lengthof(GCCRegNames);
-}
-
-const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
- { { "x16" }, "ip0"},
- { { "x17" }, "ip1"},
- { { "x29" }, "fp" },
- { { "x30" }, "lr" }
-};
-
-void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
- unsigned &NumAliases) const {
- Aliases = GCCRegAliases;
- NumAliases = llvm::array_lengthof(GCCRegAliases);
-
-}
-
-const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
- ALL_LANGUAGES },
-#include "clang/Basic/BuiltinsNEON.def"
-
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
- ALL_LANGUAGES },
-#include "clang/Basic/BuiltinsAArch64.def"
-};
-
-class AArch64leTargetInfo : public AArch64TargetInfo {
- void setDescriptionString() override {
- DescriptionString = "e-m:e-i64:64-i128:128-n32:64-S128";
- }
-
-public:
- AArch64leTargetInfo(const llvm::Triple &Triple)
- : AArch64TargetInfo(Triple) {
- BigEndian = false;
- }
- void getTargetDefines(const LangOptions &Opts,
- MacroBuilder &Builder) const override {
- Builder.defineMacro("__AARCH64EL__");
- AArch64TargetInfo::getTargetDefines(Opts, Builder);
- }
-};
-
-class AArch64beTargetInfo : public AArch64TargetInfo {
- void setDescriptionString() override {
- DescriptionString = "E-m:e-i64:64-i128:128-n32:64-S128";
- }
-
-public:
- AArch64beTargetInfo(const llvm::Triple &Triple)
- : AArch64TargetInfo(Triple) { }
- void getTargetDefines(const LangOptions &Opts,
- MacroBuilder &Builder) const override {
- Builder.defineMacro("__AARCH64EB__");
- Builder.defineMacro("__AARCH_BIG_ENDIAN");
- Builder.defineMacro("__ARM_BIG_ENDIAN");
- AArch64TargetInfo::getTargetDefines(Opts, Builder);
- }
-};
-
-} // end anonymous namespace
namespace {
class ARMTargetInfo : public TargetInfo {
@@ -4537,11 +4254,23 @@ class ARM64TargetInfo : public TargetInfo {
public:
ARM64TargetInfo(const llvm::Triple &Triple)
: TargetInfo(Triple), ABI("aapcs") {
+
+ if (getTriple().getOS() == llvm::Triple::NetBSD) {
+ WCharType = SignedInt;
+
+ // NetBSD apparently prefers consistency across ARM targets to consistency
+ // across 64-bit targets.
+ Int64Type = SignedLongLong;
+ IntMaxType = SignedLongLong;
+ UIntMaxType = UnsignedLongLong;
+ } else {
+ WCharType = UnsignedInt;
+ Int64Type = SignedLong;
+ IntMaxType = SignedLong;
+ UIntMaxType = UnsignedLong;
+ }
+
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
- IntMaxType = SignedLong;
- UIntMaxType = UnsignedLong;
- Int64Type = SignedLong;
- WCharType = UnsignedInt;
MaxVectorAlign = 128;
RegParmMax = 8;
MaxAtomicInlineWidth = 128;
@@ -6218,21 +5947,21 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
case llvm::Triple::aarch64:
switch (os) {
case llvm::Triple::Linux:
- return new LinuxTargetInfo<AArch64leTargetInfo>(Triple);
+ return new LinuxTargetInfo<ARM64leTargetInfo>(Triple);
case llvm::Triple::NetBSD:
- return new NetBSDTargetInfo<AArch64leTargetInfo>(Triple);
+ return new NetBSDTargetInfo<ARM64leTargetInfo>(Triple);
default:
- return new AArch64leTargetInfo(Triple);
+ return new ARM64leTargetInfo(Triple);
}
case llvm::Triple::aarch64_be:
switch (os) {
case llvm::Triple::Linux:
- return new LinuxTargetInfo<AArch64beTargetInfo>(Triple);
+ return new LinuxTargetInfo<ARM64beTargetInfo>(Triple);
case llvm::Triple::NetBSD:
- return new NetBSDTargetInfo<AArch64beTargetInfo>(Triple);
+ return new NetBSDTargetInfo<ARM64beTargetInfo>(Triple);
default:
- return new AArch64beTargetInfo(Triple);
+ return new ARM64beTargetInfo(Triple);
}
case llvm::Triple::arm:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9d692d8e1e2..585db1778bf 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1637,14 +1637,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (getTarget().getTriple().getArch()) {
- case llvm::Triple::aarch64:
- case llvm::Triple::aarch64_be:
- return EmitAArch64BuiltinExpr(BuiltinID, E);
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
return EmitARMBuiltinExpr(BuiltinID, E);
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
case llvm::Triple::arm64:
case llvm::Triple::arm64_be:
return EmitARM64BuiltinExpr(BuiltinID, E);
@@ -1883,354 +1882,6 @@ enum {
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
#NameBase, TypeModifier }
-static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
- NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType),
- NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType),
- NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0),
- NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0),
- NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0),
- NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
- NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
- NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
- NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
- NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers),
- NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
- NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
- NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers),
- NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01),
- NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01),
- NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers),
- NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01),
- NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers),
- NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
- NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers),
- NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers),
- NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01),
- NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01),
- NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers),
- NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01),
- NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers),
- NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
- NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers),
- NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01),
- NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers),
- NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
- NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
- NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
- NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
- NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
- NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
- NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
- NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
- NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
- NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
- NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
- NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
- NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
- NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
- NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
- NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
- NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
- NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
- NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
- NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0),
- NEONMAP0(vdupb_lane_i8),
- NEONMAP0(vdupb_laneq_i8),
- NEONMAP0(vdupd_lane_f64),
- NEONMAP0(vdupd_lane_i64),
- NEONMAP0(vdupd_laneq_f64),
- NEONMAP0(vdupd_laneq_i64),
- NEONMAP0(vduph_lane_i16),
- NEONMAP0(vduph_laneq_i16),
- NEONMAP0(vdups_lane_f32),
- NEONMAP0(vdups_lane_i32),
- NEONMAP0(vdups_laneq_f32),
- NEONMAP0(vdups_laneq_i32),
- NEONMAP0(vfmad_lane_f64),
- NEONMAP0(vfmad_laneq_f64),
- NEONMAP0(vfmas_lane_f32),
- NEONMAP0(vfmas_laneq_f32),
- NEONMAP0(vget_lane_f32),
- NEONMAP0(vget_lane_f64),
- NEONMAP0(vget_lane_i16),
- NEONMAP0(vget_lane_i32),
- NEONMAP0(vget_lane_i64),
- NEONMAP0(vget_lane_i8),
- NEONMAP0(vgetq_lane_f32),
- NEONMAP0(vgetq_lane_f64),
- NEONMAP0(vgetq_lane_i16),
- NEONMAP0(vgetq_lane_i32),
- NEONMAP0(vgetq_lane_i64),
- NEONMAP0(vgetq_lane_i8),
- NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0),
- NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0),
- NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0),
- NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0),
- NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
- NEONMAP0(vmul_n_f64),
- NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0),
- NEONMAP0(vmulxd_f64),
- NEONMAP0(vmulxs_f32),
- NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0),
- NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0),
- NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0),
- NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet),
- NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet),
- NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet),
- NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet),
- NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet),
- NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet),
- NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet),
- NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet),
- NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet),
- NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet),
- NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet),
- NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet),
- NEONMAP0(vqdmlalh_lane_s16),
- NEONMAP0(vqdmlalh_laneq_s16),
- NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet),
- NEONMAP0(vqdmlals_lane_s32),
- NEONMAP0(vqdmlals_laneq_s32),
- NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet),
- NEONMAP0(vqdmlslh_lane_s16),
- NEONMAP0(vqdmlslh_laneq_s16),
- NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet),
- NEONMAP0(vqdmlsls_lane_s32),
- NEONMAP0(vqdmlsls_laneq_s32),
- NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet),
- NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet),
- NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet),
- NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet),
- NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet),
- NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet),
- NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet),
- NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet),
- NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet),
- NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet),
- NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet),
- NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet),
- NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet),
- NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet),
- NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet),
- NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet),
- NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet),
- NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet),
- NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet),
- NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet),
- NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet),
- NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet),
- NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet),
- NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet),
- NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet),
- NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet),
- NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet),
- NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet),
- NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet),
- NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet),
- NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet),
- NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet),
- NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet),
- NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet),
- NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet),
- NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet),
- NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet),
- NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet),
- NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet),
- NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet),
- NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet),
- NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet),
- NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet),
- NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet),
- NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet),
- NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet),
- NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet),
- NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet),
- NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet),
- NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet),
- NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet),
- NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet),
- NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet),
- NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet),
- NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet),
- NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet),
- NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet),
- NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet),
- NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet),
- NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet),
- NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet),
- NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet),
- NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet),
- NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet),
- NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet),
- NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet),
- NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet),
- NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet),
- NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet),
- NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet),
- NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet),
- NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet),
- NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet),
- NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet),
- NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType),
- NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType),
- NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType),
- NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType),
- NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType),
- NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType),
- NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0),
- NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0),
- NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet),
- NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet),
- NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType),
- NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType),
- NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType),
- NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType),
- NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0),
- NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0),
- NEONMAP0(vset_lane_f32),
- NEONMAP0(vset_lane_f64),
- NEONMAP0(vset_lane_i16),
- NEONMAP0(vset_lane_i32),
- NEONMAP0(vset_lane_i64),
- NEONMAP0(vset_lane_i8),
- NEONMAP0(vsetq_lane_f32),
- NEONMAP0(vsetq_lane_f64),
- NEONMAP0(vsetq_lane_i16),
- NEONMAP0(vsetq_lane_i32),
- NEONMAP0(vsetq_lane_i64),
- NEONMAP0(vsetq_lane_i8),
- NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0),
- NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0),
- NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0),
- NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0),
- NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0),
- NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0),
- NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0),
- NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0),
- NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0),
- NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0),
- NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet),
- NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet),
- NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet),
- NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet),
- NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet),
- NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet),
- NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0),
- NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0),
- NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet),
- NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet),
- NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0),
- NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0),
- NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01),
- NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01),
- NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet),
- NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet),
- NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet),
- NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet)
-};
-
static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
@@ -2739,7 +2390,6 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
#undef NEONMAP2
static bool NEONSIMDIntrinsicsProvenSorted = false;
-static bool AArch64SISDIntrinsicInfoProvenSorted = false;
static bool ARM64SIMDIntrinsicsProvenSorted = false;
static bool ARM64SISDIntrinsicsProvenSorted = false;
@@ -2869,169 +2519,6 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
return CGF.Builder.CreateBitCast(Result, ResultType, s);
}
-static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
- const NeonIntrinsicInfo &SISDInfo,
- const CallExpr *E) {
- unsigned BuiltinID = SISDInfo.BuiltinID;
- unsigned int Int = SISDInfo.LLVMIntrinsic;
- const char *s = SISDInfo.NameHint;
-
- SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
- }
-
- // AArch64 scalar builtins are not overloaded, they do not have an extra
- // argument that specifies the vector type, need to handle each case.
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_vdups_lane_f32:
- case NEON::BI__builtin_neon_vdupd_lane_f64:
- case NEON::BI__builtin_neon_vdups_laneq_f32:
- case NEON::BI__builtin_neon_vdupd_laneq_f64: {
- return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
- }
- case NEON::BI__builtin_neon_vdupb_lane_i8:
- case NEON::BI__builtin_neon_vduph_lane_i16:
- case NEON::BI__builtin_neon_vdups_lane_i32:
- case NEON::BI__builtin_neon_vdupd_lane_i64:
- case NEON::BI__builtin_neon_vdupb_laneq_i8:
- case NEON::BI__builtin_neon_vduph_laneq_i16:
- case NEON::BI__builtin_neon_vdups_laneq_i32:
- case NEON::BI__builtin_neon_vdupd_laneq_i64: {
- // The backend treats Neon scalar types as v1ix types
- // So we want to dup lane from any vector to v1ix vector
- // with shufflevector
- s = "vdup_lane";
- Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
- Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- // AArch64 intrinsic one-element vector type cast to
- // scalar type expected by the builtin
- return CGF.Builder.CreateBitCast(Result, Ty, s);
- }
- case NEON::BI__builtin_neon_vqdmlalh_lane_s16 :
- case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 :
- case NEON::BI__builtin_neon_vqdmlals_lane_s32 :
- case NEON::BI__builtin_neon_vqdmlals_laneq_s32 :
- case NEON::BI__builtin_neon_vqdmlslh_lane_s16 :
- case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 :
- case NEON::BI__builtin_neon_vqdmlsls_lane_s32 :
- case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : {
- Int = Intrinsic::arm_neon_vqadds;
- if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) {
- Int = Intrinsic::arm_neon_vqsubs;
- }
- // create vqdmull call with b * c[i]
- llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
- llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
- Ty = CGF.ConvertType(E->getArg(0)->getType());
- llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
- Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
- Value *V = UndefValue::get(OpVTy);
- llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
- SmallVector<Value *, 2> MulOps;
- MulOps.push_back(Ops[1]);
- MulOps.push_back(Ops[2]);
- MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
- MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
- MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
- Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
- // create vqadds call with a +/- vqdmull result
- F = CGF.CGM.getIntrinsic(Int, ResVTy);
- SmallVector<Value *, 2> AddOps;
- AddOps.push_back(Ops[0]);
- AddOps.push_back(MulRes);
- V = UndefValue::get(ResVTy);
- AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
- Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
- return CGF.Builder.CreateBitCast(AddRes, Ty);
- }
- case NEON::BI__builtin_neon_vfmas_lane_f32:
- case NEON::BI__builtin_neon_vfmas_laneq_f32:
- case NEON::BI__builtin_neon_vfmad_lane_f64:
- case NEON::BI__builtin_neon_vfmad_laneq_f64: {
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- }
- // Scalar Floating-point Multiply Extended
- case NEON::BI__builtin_neon_vmulxs_f32:
- case NEON::BI__builtin_neon_vmulxd_f64: {
- Int = Intrinsic::aarch64_neon_vmulx;
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
- }
- case NEON::BI__builtin_neon_vmul_n_f64: {
- // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane
- llvm::Type *VTy = GetNeonType(&CGF,
- NeonTypeFlags(NeonTypeFlags::Float64, false, false));
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
- llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
- Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
- Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
- return CGF.Builder.CreateBitCast(Result, VTy);
- }
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vget_lane_f32:
- case NEON::BI__builtin_neon_vget_lane_f64:
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vgetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E);
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vset_lane_f64:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
-
- case NEON::BI__builtin_neon_vceqzd_s64:
- case NEON::BI__builtin_neon_vceqzd_u64:
- case NEON::BI__builtin_neon_vcgezd_s64:
- case NEON::BI__builtin_neon_vcgtzd_s64:
- case NEON::BI__builtin_neon_vclezd_s64:
- case NEON::BI__builtin_neon_vcltzd_s64:
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- break;
- case NEON::BI__builtin_neon_vceqzs_f32:
- case NEON::BI__builtin_neon_vceqzd_f64:
- case NEON::BI__builtin_neon_vcgezs_f32:
- case NEON::BI__builtin_neon_vcgezd_f64:
- case NEON::BI__builtin_neon_vcgtzs_f32:
- case NEON::BI__builtin_neon_vcgtzd_f64:
- case NEON::BI__builtin_neon_vclezs_f32:
- case NEON::BI__builtin_neon_vclezd_f64:
- case NEON::BI__builtin_neon_vcltzs_f32:
- case NEON::BI__builtin_neon_vcltzd_f64:
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- break;
- }
-
- // It didn't need any handling specific to the AArch64 backend, so defer to
- // common code.
- return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E);
-}
-
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
const char *NameHint, unsigned Modifier, const CallExpr *E,
@@ -3534,796 +3021,6 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
return CGF.EmitNeonCall(TblF, TblOps, Name);
}
-static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
- unsigned BuiltinID,
- const CallExpr *E) {
- unsigned int Int = 0;
- const char *s = nullptr;
-
- switch (BuiltinID) {
- default:
- return nullptr;
- case NEON::BI__builtin_neon_vtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1q_v:
- case NEON::BI__builtin_neon_vtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2q_v:
- case NEON::BI__builtin_neon_vtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3q_v:
- case NEON::BI__builtin_neon_vtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4q_v:
- case NEON::BI__builtin_neon_vtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1q_v:
- case NEON::BI__builtin_neon_vtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2q_v:
- case NEON::BI__builtin_neon_vtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3q_v:
- case NEON::BI__builtin_neon_vtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4q_v:
- break;
- }
-
- assert(E->getNumArgs() >= 3);
-
- // Get the last argument, which specifies the vector type.
- llvm::APSInt Result;
- const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
- return nullptr;
-
- // Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(Result.getZExtValue());
- llvm::VectorType *VTy = GetNeonType(&CGF, Type);
- llvm::Type *Ty = VTy;
- if (!Ty)
- return nullptr;
-
- SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
- Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
- }
-
- unsigned nElts = VTy->getNumElements();
-
- // AArch64 scalar builtins are not overloaded, they do not have an extra
- // argument that specifies the vector type, need to handle each case.
- SmallVector<Value *, 2> TblOps;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vtbl1_v: {
- TblOps.push_back(Ops[0]);
- return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty,
- Intrinsic::aarch64_neon_vtbl1, "vtbl1");
- }
- case NEON::BI__builtin_neon_vtbl2_v: {
- TblOps.push_back(Ops[0]);
- TblOps.push_back(Ops[1]);
- return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
- Intrinsic::aarch64_neon_vtbl1, "vtbl1");
- }
- case NEON::BI__builtin_neon_vtbl3_v: {
- TblOps.push_back(Ops[0]);
- TblOps.push_back(Ops[1]);
- TblOps.push_back(Ops[2]);
- return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty,
- Intrinsic::aarch64_neon_vtbl2, "vtbl2");
- }
- case NEON::BI__builtin_neon_vtbl4_v: {
- TblOps.push_back(Ops[0]);
- TblOps.push_back(Ops[1]);
- TblOps.push_back(Ops[2]);
- TblOps.push_back(Ops[3]);
- return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
- Intrinsic::aarch64_neon_vtbl2, "vtbl2");
- }
- case NEON::BI__builtin_neon_vtbx1_v: {
- TblOps.push_back(Ops[1]);
- Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
- Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-
- llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
- Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
- Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
- CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
-
- SmallVector<Value *, 4> BslOps;
- BslOps.push_back(CmpRes);
- BslOps.push_back(Ops[0]);
- BslOps.push_back(TblRes);
- Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
- return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
- }
- case NEON::BI__builtin_neon_vtbx2_v: {
- TblOps.push_back(Ops[1]);
- TblOps.push_back(Ops[2]);
- return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
- Intrinsic::aarch64_neon_vtbx1, "vtbx1");
- }
- case NEON::BI__builtin_neon_vtbx3_v: {
- TblOps.push_back(Ops[1]);
- TblOps.push_back(Ops[2]);
- TblOps.push_back(Ops[3]);
- Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
- Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-
- llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
- Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
- Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
- TwentyFourV);
- CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
-
- SmallVector<Value *, 4> BslOps;
- BslOps.push_back(CmpRes);
- BslOps.push_back(Ops[0]);
- BslOps.push_back(TblRes);
- Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
- return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
- }
- case NEON::BI__builtin_neon_vtbx4_v: {
- TblOps.push_back(Ops[1]);
- TblOps.push_back(Ops[2]);
- TblOps.push_back(Ops[3]);
- TblOps.push_back(Ops[4]);
- return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
- Intrinsic::aarch64_neon_vtbx2, "vtbx2");
- }
- case NEON::BI__builtin_neon_vqtbl1_v:
- case NEON::BI__builtin_neon_vqtbl1q_v:
- Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
- case NEON::BI__builtin_neon_vqtbl2_v:
- case NEON::BI__builtin_neon_vqtbl2q_v: {
- Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
- case NEON::BI__builtin_neon_vqtbl3_v:
- case NEON::BI__builtin_neon_vqtbl3q_v:
- Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
- case NEON::BI__builtin_neon_vqtbl4_v:
- case NEON::BI__builtin_neon_vqtbl4q_v:
- Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
- case NEON::BI__builtin_neon_vqtbx1_v:
- case NEON::BI__builtin_neon_vqtbx1q_v:
- Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
- case NEON::BI__builtin_neon_vqtbx2_v:
- case NEON::BI__builtin_neon_vqtbx2q_v:
- Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
- case NEON::BI__builtin_neon_vqtbx3_v:
- case NEON::BI__builtin_neon_vqtbx3q_v:
- Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
- case NEON::BI__builtin_neon_vqtbx4_v:
- case NEON::BI__builtin_neon_vqtbx4q_v:
- Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
- }
- }
-
- if (!Int)
- return nullptr;
-
- Function *F = CGF.CGM.getIntrinsic(Int, Ty);
- return CGF.EmitNeonCall(F, Ops, s);
-}
-
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
-
- // Process AArch64 scalar builtins
- llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo);
- const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
- SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted);
-
- if (Builtin) {
- Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E);
- assert(Result && "SISD intrinsic should have been handled");
- return Result;
- }
-
- // Process AArch64 table lookup builtins
- if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
- return Result;
-
- if (BuiltinID == AArch64::BI__clear_cache) {
- assert(E->getNumArgs() == 2 &&
- "Variadic __clear_cache slipped through on AArch64");
-
- const FunctionDecl *FD = E->getDirectCallee();
- SmallVector<Value *, 2> Ops;
- for (unsigned i = 0; i < E->getNumArgs(); i++)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
- llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
- StringRef Name = FD->getName();
- return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
- }
-
- SmallVector<Value *, 4> Ops;
- llvm::Value *Align = nullptr; // Alignment for load/store
-
- if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) {
- Value *Op = EmitScalarExpr(E->getArg(0));
- unsigned addressSpace =
- cast<llvm::PointerType>(Op->getType())->getAddressSpace();
- llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
- Op = Builder.CreateBitCast(Op, Ty);
- Op = Builder.CreateLoad(Op);
- Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
- return Builder.CreateBitCast(Op, Ty);
- }
- if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) {
- Value *Op0 = EmitScalarExpr(E->getArg(0));
- unsigned addressSpace =
- cast<llvm::PointerType>(Op0->getType())->getAddressSpace();
- llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
- Op0 = Builder.CreateBitCast(Op0, PTy);
- Value *Op1 = EmitScalarExpr(E->getArg(1));
- llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext());
- Op1 = Builder.CreateBitCast(Op1, Ty);
- return Builder.CreateStore(Op1, Op0);
- }
- for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
- if (i == 0) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_v:
- case NEON::BI__builtin_neon_vld1q_v:
- case NEON::BI__builtin_neon_vst1_v:
- case NEON::BI__builtin_neon_vst1q_v:
- case NEON::BI__builtin_neon_vst2_v:
- case NEON::BI__builtin_neon_vst2q_v:
- case NEON::BI__builtin_neon_vst3_v:
- case NEON::BI__builtin_neon_vst3q_v:
- case NEON::BI__builtin_neon_vst4_v:
- case NEON::BI__builtin_neon_vst4q_v:
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v:
- // Handle ld1/st1 lane in this function a little different from ARM.
- case NEON::BI__builtin_neon_vld1_lane_v:
- case NEON::BI__builtin_neon_vld1q_lane_v:
- case NEON::BI__builtin_neon_vst1_lane_v:
- case NEON::BI__builtin_neon_vst1q_lane_v:
- case NEON::BI__builtin_neon_vst2_lane_v:
- case NEON::BI__builtin_neon_vst2q_lane_v:
- case NEON::BI__builtin_neon_vst3_lane_v:
- case NEON::BI__builtin_neon_vst3q_lane_v:
- case NEON::BI__builtin_neon_vst4_lane_v:
- case NEON::BI__builtin_neon_vst4q_lane_v:
- case NEON::BI__builtin_neon_vld1_dup_v:
- case NEON::BI__builtin_neon_vld1q_dup_v:
- // Get the alignment for the argument in addition to the value;
- // we'll use it later.
- std::pair<llvm::Value *, unsigned> Src =
- EmitPointerWithAlignment(E->getArg(0));
- Ops.push_back(Src.first);
- Align = Builder.getInt32(Src.second);
- continue;
- }
- }
- if (i == 1) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_v:
- case NEON::BI__builtin_neon_vld2q_v:
- case NEON::BI__builtin_neon_vld3_v:
- case NEON::BI__builtin_neon_vld3q_v:
- case NEON::BI__builtin_neon_vld4_v:
- case NEON::BI__builtin_neon_vld4q_v:
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v:
- // Handle ld1/st1 dup lane in this function a little different from ARM.
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v:
- case NEON::BI__builtin_neon_vld2_lane_v:
- case NEON::BI__builtin_neon_vld2q_lane_v:
- case NEON::BI__builtin_neon_vld3_lane_v:
- case NEON::BI__builtin_neon_vld3q_lane_v:
- case NEON::BI__builtin_neon_vld4_lane_v:
- case NEON::BI__builtin_neon_vld4q_lane_v:
- // Get the alignment for the argument in addition to the value;
- // we'll use it later.
- std::pair<llvm::Value *, unsigned> Src =
- EmitPointerWithAlignment(E->getArg(1));
- Ops.push_back(Src.first);
- Align = Builder.getInt32(Src.second);
- continue;
- }
- }
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- }
-
- // Get the last argument, which specifies the vector type.
- llvm::APSInt Result;
- const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- if (!Arg->isIntegerConstantExpr(Result, getContext()))
- return nullptr;
-
- // Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(Result.getZExtValue());
- bool usgn = Type.isUnsigned();
- bool quad = Type.isQuad();
-
- llvm::VectorType *VTy = GetNeonType(this, Type);
- llvm::Type *Ty = VTy;
- if (!Ty)
- return nullptr;
-
- // Many NEON builtins have identical semantics and uses in ARM and
- // AArch64. Emit these in a single function.
- llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
- Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID,
- NEONSIMDIntrinsicsProvenSorted);
- if (Builtin)
- return EmitCommonNeonBuiltinExpr(
- Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
- Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
-
- unsigned Int;
- switch (BuiltinID) {
- default:
- return nullptr;
-
- // AArch64 builtins mapping to legacy ARM v7 builtins.
- // FIXME: the mapped builtins listed correspond to what has been tested
- // in aarch64-neon-intrinsics.c so far.
-
- // Shift by immediate
- case NEON::BI__builtin_neon_vrshr_n_v:
- case NEON::BI__builtin_neon_vrshrq_n_v:
- Int = usgn ? Intrinsic::aarch64_neon_vurshr
- : Intrinsic::aarch64_neon_vsrshr;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
- case NEON::BI__builtin_neon_vsra_n_v:
- if (VTy->getElementType()->isIntegerTy(64)) {
- Int = usgn ? Intrinsic::aarch64_neon_vsradu_n
- : Intrinsic::aarch64_neon_vsrads_n;
- return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n");
- }
- return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E);
- case NEON::BI__builtin_neon_vsraq_n_v:
- return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E);
- case NEON::BI__builtin_neon_vrsra_n_v:
- if (VTy->getElementType()->isIntegerTy(64)) {
- Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n
- : Intrinsic::aarch64_neon_vrsrads_n;
- return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n");
- }
- // fall through
- case NEON::BI__builtin_neon_vrsraq_n_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Int = usgn ? Intrinsic::aarch64_neon_vurshr
- : Intrinsic::aarch64_neon_vsrshr;
- Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
- return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
- }
- case NEON::BI__builtin_neon_vqshlu_n_v:
- case NEON::BI__builtin_neon_vqshluq_n_v:
- Int = Intrinsic::aarch64_neon_vsqshlu;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
- case NEON::BI__builtin_neon_vsri_n_v:
- case NEON::BI__builtin_neon_vsriq_n_v:
- Int = Intrinsic::aarch64_neon_vsri;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
- case NEON::BI__builtin_neon_vsli_n_v:
- case NEON::BI__builtin_neon_vsliq_n_v:
- Int = Intrinsic::aarch64_neon_vsli;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
- case NEON::BI__builtin_neon_vqshrun_n_v:
- Int = Intrinsic::aarch64_neon_vsqshrun;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
- case NEON::BI__builtin_neon_vrshrn_n_v:
- Int = Intrinsic::aarch64_neon_vrshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
- case NEON::BI__builtin_neon_vqrshrun_n_v:
- Int = Intrinsic::aarch64_neon_vsqrshrun;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
- case NEON::BI__builtin_neon_vqshrn_n_v:
- Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
- : Intrinsic::aarch64_neon_vsqshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
- case NEON::BI__builtin_neon_vqrshrn_n_v:
- Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
- : Intrinsic::aarch64_neon_vsqrshrn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
-
- // Convert
- case NEON::BI__builtin_neon_vcvt_n_f64_v:
- case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
- llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
- llvm::Type *Tys[2] = { FloatTy, Ty };
- Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
- : Intrinsic::arm_neon_vcvtfxs2fp;
- Function *F = CGM.getIntrinsic(Int, Tys);
- return EmitNeonCall(F, Ops, "vcvt_n");
- }
-
- // Load/Store
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v: {
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld1_x2_v:
- case NEON::BI__builtin_neon_vld1q_x2_v:
- Int = Intrinsic::aarch64_neon_vld1x2;
- break;
- case NEON::BI__builtin_neon_vld1_x3_v:
- case NEON::BI__builtin_neon_vld1q_x3_v:
- Int = Intrinsic::aarch64_neon_vld1x3;
- break;
- case NEON::BI__builtin_neon_vld1_x4_v:
- case NEON::BI__builtin_neon_vld1q_x4_v:
- Int = Intrinsic::aarch64_neon_vld1x4;
- break;
- }
- Function *F = CGM.getIntrinsic(Int, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v: {
- Ops.push_back(Align);
- unsigned Int;
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vst1_x2_v:
- case NEON::BI__builtin_neon_vst1q_x2_v:
- Int = Intrinsic::aarch64_neon_vst1x2;
- break;
- case NEON::BI__builtin_neon_vst1_x3_v:
- case NEON::BI__builtin_neon_vst1q_x3_v:
- Int = Intrinsic::aarch64_neon_vst1x3;
- break;
- case NEON::BI__builtin_neon_vst1_x4_v:
- case NEON::BI__builtin_neon_vst1q_x4_v:
- Int = Intrinsic::aarch64_neon_vst1x4;
- break;
- }
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
- }
- case NEON::BI__builtin_neon_vld1_lane_v:
- case NEON::BI__builtin_neon_vld1q_lane_v: {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- LoadInst *Ld = Builder.CreateLoad(Ops[0]);
- Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
- return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
- }
- case NEON::BI__builtin_neon_vst1_lane_v:
- case NEON::BI__builtin_neon_vst1q_lane_v: {
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- StoreInst *St =
- Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
- St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
- return St;
- }
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v:
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v:
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v: {
- // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed.
- if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
- VTy->getNumElements() == 1) {
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- Int = Intrinsic::arm_neon_vld2;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- Int = Intrinsic::arm_neon_vld3;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- Int = Intrinsic::arm_neon_vld4;
- break;
- default:
- llvm_unreachable("unknown vld_dup intrinsic?");
- }
- Function *F = CGM.getIntrinsic(Int, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
- switch (BuiltinID) {
- case NEON::BI__builtin_neon_vld2_dup_v:
- case NEON::BI__builtin_neon_vld2q_dup_v:
- Int = Intrinsic::arm_neon_vld2lane;
- break;
- case NEON::BI__builtin_neon_vld3_dup_v:
- case NEON::BI__builtin_neon_vld3q_dup_v:
- Int = Intrinsic::arm_neon_vld3lane;
- break;
- case NEON::BI__builtin_neon_vld4_dup_v:
- case NEON::BI__builtin_neon_vld4q_dup_v:
- Int = Intrinsic::arm_neon_vld4lane;
- break;
- }
- Function *F = CGM.getIntrinsic(Int, Ty);
- llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
-
- SmallVector<Value *, 6> Args;
- Args.push_back(Ops[1]);
- Args.append(STy->getNumElements(), UndefValue::get(Ty));
-
- llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
- Args.push_back(CI);
- Args.push_back(Align);
-
- Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
- // splat lane 0 to all elts in each vector of the result.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Value *Val = Builder.CreateExtractValue(Ops[1], i);
- Value *Elt = Builder.CreateBitCast(Val, Ty);
- Elt = EmitNeonSplat(Elt, CI);
- Elt = Builder.CreateBitCast(Elt, Val->getType());
- Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
- }
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
-
- case NEON::BI__builtin_neon_vmul_lane_v:
- case NEON::BI__builtin_neon_vmul_laneq_v: {
- // v1f64 vmul_lane should be mapped to Neon scalar mul lane
- bool Quad = false;
- if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
- Quad = true;
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
- Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
- Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
- return Builder.CreateBitCast(Result, Ty);
- }
-
- // AArch64-only builtins
- case NEON::BI__builtin_neon_vfmaq_laneq_v: {
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
- return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vfmaq_lane_v: {
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
- llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
- VTy->getNumElements() / 2);
- Ops[2] = Builder.CreateBitCast(Ops[2], STy);
- Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
- cast<ConstantInt>(Ops[3]));
- Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
-
- return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vfma_lane_v: {
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
- // v1f64 fma should be mapped to Neon scalar f64 fma
- if (VTy && VTy->getElementType() == DoubleTy) {
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
- llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, false));
- Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
- Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
- Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- return Builder.CreateBitCast(Result, Ty);
- }
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
- return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vfma_laneq_v: {
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
- // v1f64 fma should be mapped to Neon scalar f64 fma
- if (VTy && VTy->getElementType() == DoubleTy) {
- Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
- llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, true));
- Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
- Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
- Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- return Builder.CreateBitCast(Result, Ty);
- }
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
- llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
- VTy->getNumElements() * 2);
- Ops[2] = Builder.CreateBitCast(Ops[2], STy);
- Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
- cast<ConstantInt>(Ops[3]));
- Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
-
- return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
- }
- case NEON::BI__builtin_neon_vfms_v:
- case NEON::BI__builtin_neon_vfmsq_v: {
- Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[1] = Builder.CreateFNeg(Ops[1]);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-
- // LLVM's fma intrinsic puts the accumulator in the last position, but the
- // AArch64 intrinsic has it first.
- return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- }
- case NEON::BI__builtin_neon_vmaxnm_v:
- case NEON::BI__builtin_neon_vmaxnmq_v: {
- Int = Intrinsic::aarch64_neon_vmaxnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
- }
- case NEON::BI__builtin_neon_vminnm_v:
- case NEON::BI__builtin_neon_vminnmq_v: {
- Int = Intrinsic::aarch64_neon_vminnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
- }
- case NEON::BI__builtin_neon_vpmaxnm_v:
- case NEON::BI__builtin_neon_vpmaxnmq_v: {
- Int = Intrinsic::aarch64_neon_vpmaxnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
- }
- case NEON::BI__builtin_neon_vpminnm_v:
- case NEON::BI__builtin_neon_vpminnmq_v: {
- Int = Intrinsic::aarch64_neon_vpminnm;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
- }
- case NEON::BI__builtin_neon_vpmaxq_v: {
- Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
- }
- case NEON::BI__builtin_neon_vpminq_v: {
- Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
- }
- case NEON::BI__builtin_neon_vmulx_v:
- case NEON::BI__builtin_neon_vmulxq_v: {
- Int = Intrinsic::aarch64_neon_vmulx;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
- }
- case NEON::BI__builtin_neon_vsqadd_v:
- case NEON::BI__builtin_neon_vsqaddq_v: {
- Int = Intrinsic::aarch64_neon_usqadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
- }
- case NEON::BI__builtin_neon_vuqadd_v:
- case NEON::BI__builtin_neon_vuqaddq_v: {
- Int = Intrinsic::aarch64_neon_suqadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
- }
- case NEON::BI__builtin_neon_vrbit_v:
- case NEON::BI__builtin_neon_vrbitq_v:
- Int = Intrinsic::aarch64_neon_rbit;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
- case NEON::BI__builtin_neon_vcvt_f32_f64: {
- NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
- return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvtx_f32_v: {
- llvm::Type *EltTy = FloatTy;
- llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2);
- llvm::Type *Tys[2] = { ResTy, Ty };
- Int = Intrinsic::aarch64_neon_vcvtxn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
- }
- case NEON::BI__builtin_neon_vcvt_f64_f32: {
- llvm::Type *OpTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
- Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
- return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvt_f64_v:
- case NEON::BI__builtin_neon_vcvtq_f64_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
- return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
- : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
- }
- case NEON::BI__builtin_neon_vrndn_v:
- case NEON::BI__builtin_neon_vrndnq_v: {
- Int = Intrinsic::aarch64_neon_frintn;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
- }
- case NEON::BI__builtin_neon_vrnda_v:
- case NEON::BI__builtin_neon_vrndaq_v: {
- Int = Intrinsic::round;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
- }
- case NEON::BI__builtin_neon_vrndp_v:
- case NEON::BI__builtin_neon_vrndpq_v: {
- Int = Intrinsic::ceil;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
- }
- case NEON::BI__builtin_neon_vrndm_v:
- case NEON::BI__builtin_neon_vrndmq_v: {
- Int = Intrinsic::floor;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
- }
- case NEON::BI__builtin_neon_vrndx_v:
- case NEON::BI__builtin_neon_vrndxq_v: {
- Int = Intrinsic::rint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
- }
- case NEON::BI__builtin_neon_vrnd_v:
- case NEON::BI__builtin_neon_vrndq_v: {
- Int = Intrinsic::trunc;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd");
- }
- case NEON::BI__builtin_neon_vrndi_v:
- case NEON::BI__builtin_neon_vrndiq_v: {
- Int = Intrinsic::nearbyint;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
- }
- case NEON::BI__builtin_neon_vsqrt_v:
- case NEON::BI__builtin_neon_vsqrtq_v: {
- Int = Intrinsic::sqrt;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
- }
- case NEON::BI__builtin_neon_vceqz_v:
- case NEON::BI__builtin_neon_vceqzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
- ICmpInst::ICMP_EQ, "vceqz");
- case NEON::BI__builtin_neon_vcgez_v:
- case NEON::BI__builtin_neon_vcgezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
- ICmpInst::ICMP_SGE, "vcgez");
- case NEON::BI__builtin_neon_vclez_v:
- case NEON::BI__builtin_neon_vclezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
- ICmpInst::ICMP_SLE, "vclez");
- case NEON::BI__builtin_neon_vcgtz_v:
- case NEON::BI__builtin_neon_vcgtzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
- ICmpInst::ICMP_SGT, "vcgtz");
- case NEON::BI__builtin_neon_vcltz_v:
- case NEON::BI__builtin_neon_vcltzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
- ICmpInst::ICMP_SLT, "vcltz");
- }
-}
-
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
unsigned HintID = static_cast<unsigned>(-1);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 944a0cf13b0..74433392e15 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2201,8 +2201,6 @@ public:
const llvm::CmpInst::Predicate Fp,
const llvm::CmpInst::Predicate Ip,
const llvm::Twine &Name = "");
- llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
- llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 52e41322de0..88c4d96c1d3 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -4512,221 +4512,6 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
}
//===----------------------------------------------------------------------===//
-// AArch64 ABI Implementation
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class AArch64ABIInfo : public ABIInfo {
-public:
- AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
-
-private:
- // The AArch64 PCS is explicit about return types and argument types being
- // handled identically, so we don't need to draw a distinction between
- // Argument and Return classification.
- ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
- int &FreeVFPRegs) const;
-
- ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
- llvm::Type *DirectTy = nullptr) const;
-
- void computeInfo(CGFunctionInfo &FI) const override;
-
- llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const override;
-};
-
-class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
-public:
- AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
- :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
-
- const AArch64ABIInfo &getABIInfo() const {
- return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
- }
-
- int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
- return 31;
- }
-
- bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
- llvm::Value *Address) const override {
- // 0-31 are x0-x30 and sp: 8 bytes each
- llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
- AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
-
- // 64-95 are v0-v31: 16 bytes each
- llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
- AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
-
- return false;
- }
-
-};
-
-}
-
-void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
- int FreeIntRegs = 8, FreeVFPRegs = 8;
-
- FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
- FreeIntRegs, FreeVFPRegs);
-
- FreeIntRegs = FreeVFPRegs = 8;
- for (auto &I : FI.arguments()) {
- I.info = classifyGenericType(I.type, FreeIntRegs, FreeVFPRegs);
-
- }
-}
-
-ABIArgInfo
-AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
- bool IsInt, llvm::Type *DirectTy) const {
- if (FreeRegs >= RegsNeeded) {
- FreeRegs -= RegsNeeded;
- return ABIArgInfo::getDirect(DirectTy);
- }
-
- llvm::Type *Padding = nullptr;
-
- // We need padding so that later arguments don't get filled in anyway. That
- // wouldn't happen if only ByVal arguments followed in the same category, but
- // a large structure will simply seem to be a pointer as far as LLVM is
- // concerned.
- if (FreeRegs > 0) {
- if (IsInt)
- Padding = llvm::Type::getInt64Ty(getVMContext());
- else
- Padding = llvm::Type::getFloatTy(getVMContext());
-
- // Either [N x i64] or [N x float].
- Padding = llvm::ArrayType::get(Padding, FreeRegs);
- FreeRegs = 0;
- }
-
- return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
- /*IsByVal=*/ true, /*Realign=*/ false,
- Padding);
-}
-
-
-ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
- int &FreeIntRegs,
- int &FreeVFPRegs) const {
- // Can only occurs for return, but harmless otherwise.
- if (Ty->isVoidType())
- return ABIArgInfo::getIgnore();
-
- // Large vector types should be returned via memory. There's no such concept
- // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
- // classified they'd go into memory (see B.3).
- if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
- if (FreeIntRegs > 0)
- --FreeIntRegs;
- return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
- }
-
- // All non-aggregate LLVM types have a concrete ABI representation so they can
- // be passed directly. After this block we're guaranteed to be in a
- // complicated case.
- if (!isAggregateTypeForABI(Ty)) {
- // Treat an enum type as its underlying type.
- if (const EnumType *EnumTy = Ty->getAs<EnumType>())
- Ty = EnumTy->getDecl()->getIntegerType();
-
- if (Ty->isFloatingType() || Ty->isVectorType())
- return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
-
- assert(getContext().getTypeSize(Ty) <= 128 &&
- "unexpectedly large scalar type");
-
- int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-
- // If the type may need padding registers to ensure "alignment", we must be
- // careful when this is accounted for. Increasing the effective size covers
- // all cases.
- if (getContext().getTypeAlign(Ty) == 128)
- RegsNeeded += FreeIntRegs % 2 != 0;
-
- return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
- }
-
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
- if (FreeIntRegs > 0 && RAA == CGCXXABI::RAA_Indirect)
- --FreeIntRegs;
- return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
- }
-
- if (isEmptyRecord(getContext(), Ty, true)) {
- if (!getContext().getLangOpts().CPlusPlus) {
- // Empty structs outside C++ mode are a GNU extension, so no ABI can
- // possibly tell us what to do. It turns out (I believe) that GCC ignores
- // the object for parameter-passsing purposes.
- return ABIArgInfo::getIgnore();
- }
-
- // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
- // description of va_arg in the PCS require that an empty struct does
- // actually occupy space for parameter-passing. I'm hoping for a
- // clarification giving an explicit paragraph to point to in future.
- return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
- llvm::Type::getInt8Ty(getVMContext()));
- }
-
- // Homogeneous vector aggregates get passed in registers or on the stack.
- const Type *Base = nullptr;
- uint64_t NumMembers = 0;
- if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
- assert(Base && "Base class should be set for homogeneous aggregate");
- // Homogeneous aggregates are passed and returned directly.
- return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
- /*IsInt=*/ false);
- }
-
- uint64_t Size = getContext().getTypeSize(Ty);
- if (Size <= 128) {
- // Small structs can use the same direct type whether they're in registers
- // or on the stack.
- llvm::Type *BaseTy;
- unsigned NumBases;
- int SizeInRegs = (Size + 63) / 64;
-
- if (getContext().getTypeAlign(Ty) == 128) {
- BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
- NumBases = 1;
-
- // If the type may need padding registers to ensure "alignment", we must
- // be careful when this is accounted for. Increasing the effective size
- // covers all cases.
- SizeInRegs += FreeIntRegs % 2 != 0;
- } else {
- BaseTy = llvm::Type::getInt64Ty(getVMContext());
- NumBases = SizeInRegs;
- }
- llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
-
- return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
- /*IsInt=*/ true, DirectTy);
- }
-
- // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
- // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
- --FreeIntRegs;
- return ABIArgInfo::getIndirect(0, /* byVal = */ false);
-}
-
-llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const {
- int FreeIntRegs = 8, FreeVFPRegs = 8;
- Ty = CGF.getContext().getCanonicalType(Ty);
- ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
-
- return EmitAArch64VAArg(VAListAddr, Ty, 8 - FreeIntRegs, 8 - FreeVFPRegs,
- AI.isIndirect(), CGF);
-}
-
-//===----------------------------------------------------------------------===//
// NVPTX ABI Implementation
//===----------------------------------------------------------------------===//
@@ -6684,6 +6469,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::mips64el:
return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
case llvm::Triple::arm64:
case llvm::Triple::arm64_be: {
ARM64ABIInfo::ABIKind Kind = ARM64ABIInfo::AAPCS;
@@ -6693,10 +6480,6 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return *(TheTargetCodeGenInfo = new ARM64TargetCodeGenInfo(Types, Kind));
}
- case llvm::Triple::aarch64:
- case llvm::Triple::aarch64_be:
- return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
-
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index 7670daa1713..4c097c5120a 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -444,26 +444,6 @@ void Clang::AddPreprocessingOptions(Compilation &C,
getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
}
-/// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are targeting.
-//
-// FIXME: tblgen this.
-static std::string getAArch64TargetCPU(const ArgList &Args,
- const llvm::Triple &Triple) {
- // FIXME: Warn on inconsistent use of -mcpu and -march.
-
- // If we have -mcpu=, use that.
- if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
- StringRef MCPU = A->getValue();
- // Handle -mcpu=native.
- if (MCPU == "native")
- return llvm::sys::getHostCPUName();
- else
- return MCPU;
- }
-
- return "generic";
-}
-
// FIXME: Move to target hook.
static bool isSignedCharDefault(const llvm::Triple &Triple) {
switch (Triple.getArch()) {
@@ -1345,8 +1325,6 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
- return getAArch64TargetCPU(Args, T);
-
case llvm::Triple::arm64:
case llvm::Triple::arm64_be:
return getARM64TargetCPU(Args);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 7974b6f443e..78ba66bfb9b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -309,16 +309,13 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
case llvm::Triple::arm64:
case llvm::Triple::arm64_be:
if (CheckARM64BuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
- case llvm::Triple::aarch64:
- case llvm::Triple::aarch64_be:
- if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall))
- return ExprError();
- break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
@@ -472,14 +469,6 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
}
-bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
- CallExpr *TheCall) {
- if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
- return true;
-
- return false;
-}
-
bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
unsigned MaxWidth) {
assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
OpenPOWER on IntegriCloud