diff options
-rw-r--r-- | clang/include/clang/Basic/arm_neon.td | 35 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 236 | ||||
-rw-r--r-- | clang/test/CodeGen/aarch64-neon-intrinsics.c | 332 |
3 files changed, 568 insertions, 35 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 088516655e9..b59843a6ea3 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -633,10 +633,39 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; // Scalar Arithmetic // Scalar Addition - -def SCALAR_ADD : Inst<"vaddd", "ddd", "lUl", OP_ADD>; +def SCALAR_ADD : SInst<"vadd", "sss", "SlSUl">; +// Scalar Saturating Add +def SCALAR_QADD : SInst<"vqadd", "sss", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Subtraction -def SCALAR_SUB : Inst<"vsubd", "ddd", "lUl", OP_SUB>; +def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; +// Scalar Saturating Sub +def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Shift +// Scalar Shift Left +def SCALAR_SHL: SInst<"vshl", "sss", "SlSUl">; +// Scalar Saturating Shift Left +def SCALAR_QSHL: SInst<"vqshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +// Scalar Saturating Rounding Shift Left +def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +// Scalar Shift Rouding Left +def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Reduce Pairwise Addition (Scalar and Floating Point) +def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Reduce Floating Point Pairwise Max/Min +def SCALAR_FMAXP : SInst<"vpmax", "sd", "SfSQd">; + +def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Reduce Floating Point Pairwise maxNum/minNum +def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; +def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d1a41cfd012..8945ce3aaa2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1605,26 +1605,28 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, } static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, - NeonTypeFlags TypeFlags) { + NeonTypeFlags TypeFlags, + bool V1Ty=false) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { + default: + llvm_unreachable("Invalid NeonTypeFlags element type!"); case NeonTypeFlags::Int8: case NeonTypeFlags::Poly8: - return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad); + return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: case NeonTypeFlags::Float16: - return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad); + return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Int32: - return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad); + return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: - return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad); + return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); case NeonTypeFlags::Float32: - return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad); + return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Float64: - return llvm::VectorType::get(CGF->DoubleTy, 1 << IsQuad); + return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); } - llvm_unreachable("Invalid NeonTypeFlags element type!"); } Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { @@ -1711,8 +1713,226 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { return std::make_pair(EmitScalarExpr(Addr), Align); } +static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, + unsigned BuiltinID, + const CallExpr *E) { + NeonTypeFlags::EltType ET; + bool usgn; + unsigned int Int = 0; + bool OverloadInt = true; + const char *s = NULL; + + SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); + } + + // AArch64 scalar builtins are not overloaded, they do not have an extra + // argument that specifies the vector type, need to handle each case. + switch (BuiltinID) { + default: break; + // Scalar Add + case AArch64::BI__builtin_neon_vaddd_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vaddds; + s = "vaddds"; usgn = false; OverloadInt = false; break; + case AArch64::BI__builtin_neon_vaddd_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vadddu; + s = "vadddu"; usgn = true; OverloadInt = false; break; + // Scalar Sub + case AArch64::BI__builtin_neon_vsubd_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubds; + s = "vsubds"; usgn = false; OverloadInt = false; break; + case AArch64::BI__builtin_neon_vsubd_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubdu; + s = "vsubdu"; usgn = true; OverloadInt = false; break; + // Scalar Saturating Add + case AArch64::BI__builtin_neon_vqaddb_s8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqaddh_s16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqadds_s32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqaddd_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqadds; + s = "vqadds"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqaddb_u8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqaddh_u16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqadds_u32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqaddd_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqaddu; + s = "vqaddu"; usgn = true; OverloadInt = true; break; + // Scalar Saturating Sub + case AArch64::BI__builtin_neon_vqsubb_s8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubh_s16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubs_s32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubd_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubs; + s = "vqsubs"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubb_u8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubh_u16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubs_u32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqsubd_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubu; + s = "vqsubu"; usgn = true; OverloadInt = true; break; + // Scalar Shift Left + case AArch64::BI__builtin_neon_vshld_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshlds; + s = "vshlds"; usgn = false; OverloadInt=false; break; + case AArch64::BI__builtin_neon_vshld_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshldu; + s = "vshldu"; usgn = true; OverloadInt = false; break; + // Scalar Saturating Shift Left + case AArch64::BI__builtin_neon_vqshlb_s8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshlh_s16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshls_s32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshld_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshls; + s = "vqshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshlb_u8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshlh_u16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshls_u32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqshld_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshlu; + s = "vqshlu"; usgn = true; OverloadInt = true; break; + // Scalar Rouding Shift Left + case AArch64::BI__builtin_neon_vrshld_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshlds; + s = "vrshlds"; usgn = false; OverloadInt=false; break; + case AArch64::BI__builtin_neon_vrshld_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshldu; + s = "vrshldu"; usgn = true; OverloadInt=false; break; + // Scalar Saturating Rouding Shift Left + case AArch64::BI__builtin_neon_vqrshlb_s8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshlh_s16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshls_s32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshld_s64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshls; + s = "vqrshls"; usgn = false; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshlb_u8: + ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshlh_u16: + ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshls_u32: + ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; usgn = true; OverloadInt = true; break; + case AArch64::BI__builtin_neon_vqrshld_u64: + ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshlu; + s = "vqrshlu"; usgn = true; OverloadInt = true; break; + // Scalar Reduce Pairwise Add + case AArch64::BI__builtin_neon_vpaddd_s64: + Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpadds_f32: + Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpaddd_f64: + Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq"; + OverloadInt = false; break; + // Scalar Reduce Pairwise Floating Point Max + case AArch64::BI__builtin_neon_vpmaxs_f32: + Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpmaxqd_f64: + Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq"; + OverloadInt = false; break; + // Scalar Reduce Pairwise Floating Point Min + case AArch64::BI__builtin_neon_vpmins_f32: + Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpminqd_f64: + Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq"; + OverloadInt = false; break; + // Scalar Reduce Pairwise Floating Point Maxnm + case AArch64::BI__builtin_neon_vpmaxnms_f32: + Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpmaxnmqd_f64: + Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq"; + OverloadInt = false; break; + // Scalar Reduce Pairwise Floating Point Minnm + case AArch64::BI__builtin_neon_vpminnms_f32: + Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vpminnmqd_f64: + Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq"; + OverloadInt = false; break; + } + + if (!Int) + return 0; + + // AArch64 scalar builtin that returns scalar type + // and should be mapped to AArch64 intrinsic that takes + // one-element vector type arguments and returns + // one-element vector type. + llvm::Type *Ty = 0; + Function *F = 0; + if (OverloadInt) { + // Determine the type of this overloaded AArch64 intrinsic + NeonTypeFlags Type(ET, usgn, false); + llvm::VectorType *VTy = GetNeonType(&CGF, Type, true); + Ty = VTy; + if (!Ty) + return 0; + F = CGF.CGM.getIntrinsic(Int, Ty); + } else + F = CGF.CGM.getIntrinsic(Int); + + Value *Result = CGF.EmitNeonCall(F, Ops, s); + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + // AArch64 intrinsic one-element vector type cast to + // scalar type expected by the builtin + return CGF.Builder.CreateBitCast(Result, ResultType, s); +} + Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + + // Process AArch64 scalar builtins + if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) + return Result; + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "Variadic __clear_cache slipped through on AArch64"); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 041f6d7bf67..b71256a6650 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -566,30 +566,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { // CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } -uint64x1_t test_vaddd_u64(uint64x1_t v1, uint64x1_t v2) { - // CHECK: test_vaddd_u64 - return vaddd_u64(v1, v2); - // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -} - -int64x1_t test_vaddd_s64(int64x1_t v1, int64x1_t v2) { - // CHECK: test_vaddd_s64 - return vaddd_s64(v1, v2); - // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -} - -uint64x1_t test_vsubd_u64(uint64x1_t v1, uint64x1_t v2) { - // CHECK: test_vsubd_u64 - return vsubd_u64(v1, v2); - // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -} - -int64x1_t test_vsubd_s64(int64x1_t v1, int64x1_t v2) { - // CHECK: test_vsubd_s64 - return vsubd_s64(v1, v2); - // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -} - int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: test_vaba_s8 return vaba_s8(v1, v2, v3); @@ -5254,3 +5230,311 @@ poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { return vmull_high_p8(a, b); // CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b } + +int64_t test_vaddd_s64(int64_t a, int64_t b) { +// CHECK: test_vaddd_s64 + return vaddd_s64(a, b); +// CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64_t test_vaddd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vaddd_u64 + return vaddd_u64(a, b); +// CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int64_t test_vsubd_s64(int64_t a, int64_t b) { +// CHECK: test_vsubd_s64 + return vsubd_s64(a, b); +// CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vsubd_u64 + return vsubd_u64(a, b); +// CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8_t test_vqaddb_s8(int8_t a, int8_t b) { +// CHECK: test_vqaddb_s8 + return vqaddb_s8(a, b); +// CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +int16_t test_vqaddh_s16(int16_t a, int16_t b) { +// CHECK: test_vqaddh_s16 + return vqaddh_s16(a, b); +// CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +int32_t test_vqadds_s32(int32_t a, int32_t b) { +// CHECK: test_vqadds_s32 + return vqadds_s32(a, b); +// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +int64_t test_vqaddd_s64(int64_t a, int64_t b) { +// CHECK: test_vqaddd_s64 + return vqaddd_s64(a, b); +// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { +// CHECK: test_vqaddb_u8 + return vqaddb_u8(a, b); +// CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { +// CHECK: test_vqaddh_u16 + return vqaddh_u16(a, b); +// CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { +// CHECK: test_vqadds_u32 + return vqadds_u32(a, b); +// CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vqaddd_u64 + return vqaddd_u64(a, b); +// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8_t test_vqsubb_s8(int8_t a, int8_t b) { +// CHECK: test_vqsubb_s8 + return vqsubb_s8(a, b); +// CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +int16_t test_vqsubh_s16(int16_t a, int16_t b) { +// CHECK: test_vqsubh_s16 + return vqsubh_s16(a, b); +// CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +int32_t test_vqsubs_s32(int32_t a, int32_t b) { + // CHECK: test_vqsubs_s32 + return vqsubs_s32(a, b); +// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +int64_t test_vqsubd_s64(int64_t a, int64_t b) { +// CHECK: test_vqsubd_s64 + return vqsubd_s64(a, b); +// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { +// CHECK: test_vqsubb_u8 + return vqsubb_u8(a, b); +// CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { +// CHECK: test_vqsubh_u16 + return vqsubh_u16(a, b); +// CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { +// CHECK: test_vqsubs_u32 + return vqsubs_u32(a, b); +// CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vqsubd_u64 + return vqsubd_u64(a, b); +// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int64_t test_vshld_s64(int64_t a, int64_t b) { +// CHECK: test_vshld_s64 + return vshld_s64(a, b); +// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64_t test_vshld_u64(uint64_t a, uint64_t b) { +// CHECK: test_vshld_u64 + return vshld_u64(a, b); +// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vqshlb_s8 +int8_t test_vqshlb_s8(int8_t a, int8_t b) { + return vqshlb_s8(a, b); +// CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +// CHECK: test_vqshlh_s16 +int16_t test_vqshlh_s16(int16_t a, int16_t b) { + return vqshlh_s16(a, b); +// CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +// CHECK: test_vqshls_s32 +int32_t test_vqshls_s32(int32_t a, int32_t b) { + return vqshls_s32(a, b); +// CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +// CHECK: test_vqshld_s64 +int64_t test_vqshld_s64(int64_t a, int64_t b) { + return vqshld_s64(a, b); +// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vqshlb_u8 +uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { + return vqshlb_u8(a, b); +// CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +// CHECK: test_vqshlh_u16 +uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { + return vqshlh_u16(a, b); +// CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +// CHECK: test_vqshls_u32 +uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { + return vqshls_u32(a, b); +// CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +// CHECK: test_vqshld_u64 +uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { + return vqshld_u64(a, b); +// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vrshld_s64 +int64_t test_vrshld_s64(int64_t a, int64_t b) { + return vrshld_s64(a, b); +// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + + +// CHECK: test_vrshld_u64 +uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { + return vrshld_u64(a, b); +// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vqrshlb_s8 +int8_t test_vqrshlb_s8(int8_t a, int8_t b) { + return vqrshlb_s8(a, b); +// CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +// CHECK: test_vqrshlh_s16 +int16_t test_vqrshlh_s16(int16_t a, int16_t b) { + return vqrshlh_s16(a, b); +// CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +// CHECK: test_vqrshls_s32 +int32_t test_vqrshls_s32(int32_t a, int32_t b) { + return vqrshls_s32(a, b); +// CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +// CHECK: test_vqrshld_s64 +int64_t test_vqrshld_s64(int64_t a, int64_t b) { + return vqrshld_s64(a, b); +// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vqrshlb_u8 +uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { + return vqrshlb_u8(a, b); +// CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} +} + +// CHECK: test_vqrshlh_u16 +uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { + return vqrshlh_u16(a, b); +// CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +// CHECK: test_vqrshls_u32 +uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { + return vqrshls_u32(a, b); +// CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +// CHECK: test_vqrshld_u64 +uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { + return vqrshld_u64(a, b); +// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +// CHECK: test_vpaddd_s64 +int64_t test_vpaddd_s64(int64x2_t a) { + return vpaddd_s64(a); +// CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + +// CHECK: test_vpadds_f32 +float32_t test_vpadds_f32(float32x2_t a) { + return vpadds_f32(a); +// CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s +} + +// CHECK: test_vpaddd_f64 +float64_t test_vpaddd_f64(float64x2_t a) { + return vpaddd_f64(a); +// CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + +// CHECK: test_vpmaxnms_f32 +float32_t test_vpmaxnms_f32(float32x2_t a) { + return vpmaxnms_f32(a); +// CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s +} + +// CHECK: test_vpmaxnmqd_f64 +float64_t test_vpmaxnmqd_f64(float64x2_t a) { + return vpmaxnmqd_f64(a); +// CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + +// CHECK: test_vpmaxs_f32 +float32_t test_vpmaxs_f32(float32x2_t a) { + return vpmaxs_f32(a); +// CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s +} + +// CHECK: test_vpmaxqd_f64 +float64_t test_vpmaxqd_f64(float64x2_t a) { + return vpmaxqd_f64(a); +// CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + +// CHECK: test_vpminnms_f32 +float32_t test_vpminnms_f32(float32x2_t a) { + return vpminnms_f32(a); +// CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s +} + +// CHECK: test_vpminnmqd_f64 +float64_t test_vpminnmqd_f64(float64x2_t a) { + return vpminnmqd_f64(a); +// CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + +// CHECK: test_vpmins_f32 +float32_t test_vpmins_f32(float32x2_t a) { + return vpmins_f32(a); +// CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s +} + +// CHECK: test_vpminqd_f64 +float64_t test_vpminqd_f64(float64x2_t a) { + return vpminqd_f64(a); +// CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d +} + |