diff options
| -rw-r--r-- | clang/include/clang/Basic/arm_neon.td | 40 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 16 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-intrinsics.c | 968 | ||||
| -rw-r--r-- | clang/utils/TableGen/NeonEmitter.cpp | 190 |
4 files changed, 1199 insertions, 15 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 1ac5f9b7168..ad10abb1c2b 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -18,15 +18,22 @@ def OP_NONE : Op; def OP_UNAVAILABLE : Op; def OP_ADD : Op; def OP_ADDL : Op; +def OP_ADDLHi : Op; def OP_ADDW : Op; +def OP_ADDWHi : Op; def OP_SUB : Op; def OP_SUBL : Op; +def OP_SUBLHi : Op; def OP_SUBW : Op; +def OP_SUBWHi : Op; def OP_MUL : Op; def OP_MLA : Op; def OP_MLAL : Op; +def OP_MULLHi : Op; +def OP_MLALHi : Op; def OP_MLS : Op; def OP_MLSL : Op; +def OP_MLSLHi : Op; def OP_MUL_N : Op; def OP_MLA_N : Op; def OP_MLS_N : Op; @@ -66,9 +73,18 @@ def OP_REV64 : Op; def OP_REV32 : Op; def OP_REV16 : Op; def OP_REINT : Op; +def OP_ADDHNHi : Op; +def OP_RADDHNHi : Op; +def OP_SUBHNHi : Op; +def OP_RSUBHNHi : Op; def OP_ABDL : Op; +def OP_ABDLHi : Op; def OP_ABA : Op; def OP_ABAL : Op; +def OP_ABALHi : Op; +def OP_QDMULLHi : Op; +def OP_QDMLALHi : Op; +def OP_QDMLSLHi : Op; def OP_DIV : Op; def OP_LONG_HI : Op; def OP_NARROW_HI : Op; @@ -133,6 +149,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} // w: double width elements, same num elts // n: double width elements, half num elts // h: half width elements, double num elts +// q: half width elements, quad num elts // e: half width elements, double num elts, unsigned // m: half width elements, same num elts // i: constant int @@ -590,6 +607,29 @@ def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">; } //////////////////////////////////////////////////////////////////////////////// +// 3VDiff class using high 64-bit in operands +def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>; +def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>; +def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>; +def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>; + +def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>; +def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; + +def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; +def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; +def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; + +def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; +def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; +def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>; +def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; + +def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; +def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; +def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; + +//////////////////////////////////////////////////////////////////////////////// // Scalar Arithmetic // Scalar Addition diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6bf5d6f54af..5b5b39f5e34 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1840,6 +1840,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E); case AArch64::BI__builtin_neon_vqrshlq_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E); + case AArch64::BI__builtin_neon_vaddhn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E); + case AArch64::BI__builtin_neon_vraddhn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E); + case AArch64::BI__builtin_neon_vsubhn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E); + case AArch64::BI__builtin_neon_vrsubhn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E); + case AArch64::BI__builtin_neon_vmull_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E); + case AArch64::BI__builtin_neon_vqdmull_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E); + case AArch64::BI__builtin_neon_vqdmlal_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E); + case AArch64::BI__builtin_neon_vqdmlsl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E); case AArch64::BI__builtin_neon_vmax_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E); case AArch64::BI__builtin_neon_vmaxq_v: diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 9dce70d50a9..53ec130184c 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -4274,3 +4274,971 @@ uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { return vcvtq_n_u64_f64(a, 50); // CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 } + +int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vaddl_s8 + return vaddl_s8(a, b); + // CHECK: saddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} + +int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vaddl_s16 + return vaddl_s16(a, b); + // CHECK: saddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vaddl_s32 + return vaddl_s32(a, b); + // CHECK: saddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vaddl_u8 + return vaddl_u8(a, b); + // CHECK: uaddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} + +uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vaddl_u16 + return vaddl_u16(a, b); + // CHECK: uaddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vaddl_u32 + return vaddl_u32(a, b); + // CHECK: uaddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vaddl_high_s8 + return vaddl_high_s8(a, b); + // CHECK: saddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} + +int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vaddl_high_s16 + return vaddl_high_s16(a, b); + // CHECK: saddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vaddl_high_s32 + return vaddl_high_s32(a, b); + // CHECK: saddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vaddl_high_u8 + return vaddl_high_u8(a, b); + // CHECK: uaddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} + +uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vaddl_high_u16 + return vaddl_high_u16(a, b); + // CHECK: uaddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vaddl_high_u32 + return vaddl_high_u32(a, b); + // CHECK: uaddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { + // CHECK: test_vaddw_s8 + return vaddw_s8(a, b); + // CHECK: saddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b +} + +int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { + // CHECK: test_vaddw_s16 + return vaddw_s16(a, b); + // CHECK: saddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h +} + +int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { + // CHECK: test_vaddw_s32 + return vaddw_s32(a, b); + // CHECK: saddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s +} + +uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { + // CHECK: test_vaddw_u8 + return vaddw_u8(a, b); + // CHECK: uaddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b +} + +uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { + // CHECK: test_vaddw_u16 + return vaddw_u16(a, b); + // CHECK: uaddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h +} + +uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { + // CHECK: test_vaddw_u32 + return vaddw_u32(a, b); + // CHECK: uaddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s +} + +int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { + // CHECK: test_vaddw_high_s8 + return vaddw_high_s8(a, b); + // CHECK: saddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b +} + +int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { + // CHECK: test_vaddw_high_s16 + return vaddw_high_s16(a, b); + // CHECK: saddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h +} + +int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { + // CHECK: test_vaddw_high_s32 + return vaddw_high_s32(a, b); + // CHECK: saddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s +} + +uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { + // CHECK: test_vaddw_high_u8 + return vaddw_high_u8(a, b); + // CHECK: uaddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b +} + +uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { + // CHECK: test_vaddw_high_u16 + return vaddw_high_u16(a, b); + // CHECK: uaddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h +} + +uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { + // CHECK: test_vaddw_high_u32 + return vaddw_high_u32(a, b); + // CHECK: uaddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s +} + +int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vsubl_s8 + return vsubl_s8(a, b); + // CHECK: ssubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} + +int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vsubl_s16 + return vsubl_s16(a, b); + // CHECK: ssubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vsubl_s32 + return vsubl_s32(a, b); + // CHECK: ssubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vsubl_u8 + return vsubl_u8(a, b); + // CHECK: usubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} + +uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vsubl_u16 + return vsubl_u16(a, b); + // CHECK: usubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vsubl_u32 + return vsubl_u32(a, b); + // CHECK: usubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vsubl_high_s8 + return vsubl_high_s8(a, b); + // CHECK: ssubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} + +int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsubl_high_s16 + return vsubl_high_s16(a, b); + // CHECK: ssubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsubl_high_s32 + return vsubl_high_s32(a, b); + // CHECK: ssubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vsubl_high_u8 + return vsubl_high_u8(a, b); + // CHECK: usubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} + +uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vsubl_high_u16 + return vsubl_high_u16(a, b); + // CHECK: usubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vsubl_high_u32 + return vsubl_high_u32(a, b); + // CHECK: usubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { + // CHECK: test_vsubw_s8 + return vsubw_s8(a, b); + // CHECK: ssubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b +} + +int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { + // CHECK: test_vsubw_s16 + return vsubw_s16(a, b); + // CHECK: ssubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h +} + +int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { + // CHECK: test_vsubw_s32 + return vsubw_s32(a, b); + // CHECK: ssubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s +} + +uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { + // CHECK: test_vsubw_u8 + return vsubw_u8(a, b); + // CHECK: usubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b +} + +uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { + // CHECK: test_vsubw_u16 + return vsubw_u16(a, b); + // CHECK: usubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h +} + +uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { + // CHECK: test_vsubw_u32 + return vsubw_u32(a, b); + // CHECK: usubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s +} + +int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { + // CHECK: test_vsubw_high_s8 + return vsubw_high_s8(a, b); + // CHECK: ssubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b +} + +int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { + // CHECK: test_vsubw_high_s16 + return vsubw_high_s16(a, b); + // CHECK: ssubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h +} + +int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { + // CHECK: test_vsubw_high_s32 + return vsubw_high_s32(a, b); + // CHECK: ssubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s +} + +uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { + // CHECK: test_vsubw_high_u8 + return vsubw_high_u8(a, b); + // CHECK: usubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b +} + +uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { + // CHECK: test_vsubw_high_u16 + return vsubw_high_u16(a, b); + // CHECK: usubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h +} + +uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { + // CHECK: test_vsubw_high_u32 + return vsubw_high_u32(a, b); + // CHECK: usubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s +} + +int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vaddhn_s16 + return vaddhn_s16(a, b); + // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vaddhn_s32 + return vaddhn_s32(a, b); + // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vaddhn_s64 + return vaddhn_s64(a, b); + // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vaddhn_u16 + return vaddhn_u16(a, b); + // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vaddhn_u32 + return vaddhn_u32(a, b); + // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vaddhn_u64 + return vaddhn_u64(a, b); + // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { + // CHECK: test_vaddhn_high_s16 + return vaddhn_high_s16(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { + // CHECK: test_vaddhn_high_s32 + return vaddhn_high_s32(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { + // CHECK: test_vaddhn_high_s64 + return vaddhn_high_s64(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { + // CHECK: test_vaddhn_high_u16 + return vaddhn_high_u16(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { + // CHECK: test_vaddhn_high_u32 + return vaddhn_high_u32(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { + // CHECK: test_vaddhn_high_u64 + return vaddhn_high_u64(r, a, b); + // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vraddhn_s16 + return vraddhn_s16(a, b); + // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vraddhn_s32 + return vraddhn_s32(a, b); + // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vraddhn_s64 + return vraddhn_s64(a, b); + // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vraddhn_u16 + return vraddhn_u16(a, b); + // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vraddhn_u32 + return vraddhn_u32(a, b); + // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vraddhn_u64 + return vraddhn_u64(a, b); + // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { + // CHECK: test_vraddhn_high_s16 + return vraddhn_high_s16(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { + // CHECK: test_vraddhn_high_s32 + return vraddhn_high_s32(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { + // CHECK: test_vraddhn_high_s64 + return vraddhn_high_s64(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { + // CHECK: test_vraddhn_high_u16 + return vraddhn_high_u16(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { + // CHECK: test_vraddhn_high_u32 + return vraddhn_high_u32(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { + // CHECK: test_vraddhn_high_u64 + return vraddhn_high_u64(r, a, b); + // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vsubhn_s16 + return vsubhn_s16(a, b); + // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vsubhn_s32 + return vsubhn_s32(a, b); + // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vsubhn_s64 + return vsubhn_s64(a, b); + // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vsubhn_u16 + return vsubhn_u16(a, b); + // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vsubhn_u32 + return vsubhn_u32(a, b); + // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vsubhn_u64 + return vsubhn_u64(a, b); + // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { + // CHECK: test_vsubhn_high_s16 + return vsubhn_high_s16(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { + // CHECK: test_vsubhn_high_s32 + return vsubhn_high_s32(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { + // CHECK: test_vsubhn_high_s64 + return vsubhn_high_s64(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { + // CHECK: test_vsubhn_high_u16 + return vsubhn_high_u16(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { + // CHECK: test_vsubhn_high_u32 + return vsubhn_high_u32(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { + // CHECK: test_vsubhn_high_u64 + return vsubhn_high_u64(r, a, b); + // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vrsubhn_s16 + return vrsubhn_s16(a, b); + // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vrsubhn_s32 + return vrsubhn_s32(a, b); + // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { + // CHECK: test_vrsubhn_s64 + return vrsubhn_s64(a, b); + // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vrsubhn_u16 + return vrsubhn_u16(a, b); + // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vrsubhn_u32 + return vrsubhn_u32(a, b); + // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { + // CHECK: test_vrsubhn_u64 + return vrsubhn_u64(a, b); + // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { + // CHECK: test_vrsubhn_high_s16 + return vrsubhn_high_s16(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { + // CHECK: test_vrsubhn_high_s32 + return vrsubhn_high_s32(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { + // CHECK: test_vrsubhn_high_s64 + return vrsubhn_high_s64(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { + // CHECK: test_vrsubhn_high_u16 + return vrsubhn_high_u16(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { + // CHECK: test_vrsubhn_high_u32 + return vrsubhn_high_u32(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { + // CHECK: test_vrsubhn_high_u64 + return vrsubhn_high_u64(r, a, b); + // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +} + +int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vabdl_s8 + return vabdl_s8(a, b); + // CHECK: sabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vabdl_s16 + return vabdl_s16(a, b); + // CHECK: sabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vabdl_s32 + return vabdl_s32(a, b); + // CHECK: sabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} +uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vabdl_u8 + return vabdl_u8(a, b); + // CHECK: uabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vabdl_u16 + return vabdl_u16(a, b); + // CHECK: uabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vabdl_u32 + return vabdl_u32(a, b); + // CHECK: uabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { + // CHECK: test_vabal_s8 + return vabal_s8(a, b, c); + // CHECK: sabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { + // CHECK: test_vabal_s16 + return vabal_s16(a, b, c); + // CHECK: sabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { + // CHECK: test_vabal_s32 + return vabal_s32(a, b, c); + // CHECK: sabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} +uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { + // CHECK: test_vabal_u8 + return vabal_u8(a, b, c); + // CHECK: uabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { + // CHECK: test_vabal_u16 + return vabal_u16(a, b, c); + // CHECK: uabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { + // CHECK: test_vabal_u32 + return vabal_u32(a, b, c); + // CHECK: uabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vabdl_high_s8 + return vabdl_high_s8(a, b); + // CHECK: sabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vabdl_high_s16 + return vabdl_high_s16(a, b); + // CHECK: sabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vabdl_high_s32 + return vabdl_high_s32(a, b); + // CHECK: sabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} +uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vabdl_high_u8 + return vabdl_high_u8(a, b); + // CHECK: uabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vabdl_high_u16 + return vabdl_high_u16(a, b); + // CHECK: uabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vabdl_high_u32 + return vabdl_high_u32(a, b); + // CHECK: uabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { + // CHECK: test_vabal_high_s8 + return vabal_high_s8(a, b, c); + // CHECK: sabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { + // CHECK: test_vabal_high_s16 + return vabal_high_s16(a, b, c); + // CHECK: sabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { + // CHECK: test_vabal_high_s32 + return vabal_high_s32(a, b, c); + // CHECK: sabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} +uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { + // CHECK: test_vabal_high_u8 + return vabal_high_u8(a, b, c); + // CHECK: uabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { + // CHECK: test_vabal_high_u16 + return vabal_high_u16(a, b, c); + // CHECK: uabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { + // CHECK: test_vabal_high_u32 + return vabal_high_u32(a, b, c); + // CHECK: uabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { + // CHECK: test_vmull_s8 + return vmull_s8(a, b); + // CHECK: smull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vmull_s16 + return vmull_s16(a, b); + // CHECK: smull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vmull_s32 + return vmull_s32(a, b); + // CHECK: smull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} +uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { + // CHECK: test_vmull_u8 + return vmull_u8(a, b); + // CHECK: umull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { + // CHECK: test_vmull_u16 + return vmull_u16(a, b); + // CHECK: umull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { + // CHECK: test_vmull_u32 + return vmull_u32(a, b); + // CHECK: umull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { + // CHECK: test_vmull_high_s8 + return vmull_high_s8(a, b); + // CHECK: smull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vmull_high_s16 + return vmull_high_s16(a, b); + // CHECK: smull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vmull_high_s32 + return vmull_high_s32(a, b); + // CHECK: smull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} +uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { + // CHECK: test_vmull_high_u8 + return vmull_high_u8(a, b); + // CHECK: umull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { + // CHECK: test_vmull_high_u16 + return vmull_high_u16(a, b); + // CHECK: umull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { + // CHECK: test_vmull_high_u32 + return vmull_high_u32(a, b); + // CHECK: umull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { + // CHECK: test_vmlal_s8 + return vmlal_s8(a, b, c); + // CHECK: smlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { + // CHECK: test_vmlal_s16 + return vmlal_s16(a, b, c); + // CHECK: smlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { + // CHECK: test_vmlal_s32 + return vmlal_s32(a, b, c); + // CHECK: smlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} +uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { + // CHECK: test_vmlal_u8 + return vmlal_u8(a, b, c); + // CHECK: umlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { + // CHECK: test_vmlal_u16 + return vmlal_u16(a, b, c); + // CHECK: umlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { + // CHECK: test_vmlal_u32 + return vmlal_u32(a, b, c); + // CHECK: umlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { + // CHECK: test_vmlal_high_s8 + return vmlal_high_s8(a, b, c); + // CHECK: smlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { + // CHECK: test_vmlal_high_s16 + return vmlal_high_s16(a, b, c); + // CHECK: smlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { + // CHECK: test_vmlal_high_s32 + return vmlal_high_s32(a, b, c); + // CHECK: smlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} +uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { + // CHECK: test_vmlal_high_u8 + return vmlal_high_u8(a, b, c); + // CHECK: umlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { + // CHECK: test_vmlal_high_u16 + return vmlal_high_u16(a, b, c); + // CHECK: umlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { + // CHECK: test_vmlal_high_u32 + return vmlal_high_u32(a, b, c); + // CHECK: umlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { + // CHECK: test_vmlsl_s8 + return vmlsl_s8(a, b, c); + // CHECK: smlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { + // CHECK: test_vmlsl_s16 + return vmlsl_s16(a, b, c); + // CHECK: smlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { + // CHECK: test_vmlsl_s32 + return vmlsl_s32(a, b, c); + // CHECK: smlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} +uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { + // CHECK: test_vmlsl_u8 + return vmlsl_u8(a, b, c); + // CHECK: umlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} +uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { + // CHECK: test_vmlsl_u16 + return vmlsl_u16(a, b, c); + // CHECK: umlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { + // CHECK: test_vmlsl_u32 + return vmlsl_u32(a, b, c); + // CHECK: umlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { + // CHECK: test_vmlsl_high_s8 + return vmlsl_high_s8(a, b, c); + // CHECK: smlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { + // CHECK: test_vmlsl_high_s16 + return vmlsl_high_s16(a, b, c); + // CHECK: smlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { + // CHECK: test_vmlsl_high_s32 + return vmlsl_high_s32(a, b, c); + // CHECK: smlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} +uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { + // CHECK: test_vmlsl_high_u8 + return vmlsl_high_u8(a, b, c); + // CHECK: umlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} +uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { + // CHECK: test_vmlsl_high_u16 + return vmlsl_high_u16(a, b, c); + // CHECK: umlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { + // CHECK: test_vmlsl_high_u32 + return vmlsl_high_u32(a, b, c); + // CHECK: umlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { + // CHECK: test_vqdmull_s16 + return vqdmull_s16(a, b); + // CHECK: sqdmull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} +int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { + // CHECK: test_vqdmull_s32 + return vqdmull_s32(a, b); + // CHECK: sqdmull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { + // CHECK: test_vqdmlal_s16 + return vqdmlal_s16(a, b, c); + // CHECK: sqdmlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { + // CHECK: test_vqdmlal_s32 + return vqdmlal_s32(a, b, c); + // CHECK: sqdmlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { + // CHECK: test_vqdmlsl_s16 + return vqdmlsl_s16(a, b, c); + // CHECK: sqdmlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +} + +int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { + // CHECK: test_vqdmlsl_s32 + return vqdmlsl_s32(a, b, c); + // CHECK: sqdmlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +} + +int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { + // CHECK: test_vqdmull_high_s16 + return vqdmull_high_s16(a, b); + // CHECK: sqdmull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} +int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { + // CHECK: test_vqdmull_high_s32 + return vqdmull_high_s32(a, b); + // CHECK: sqdmull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { + // CHECK: test_vqdmlal_high_s16 + return vqdmlal_high_s16(a, b, c); + // CHECK: sqdmlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { + // CHECK: test_vqdmlal_high_s32 + return vqdmlal_high_s32(a, b, c); + // CHECK: sqdmlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { + // CHECK: test_vqdmlsl_high_s16 + return vqdmlsl_high_s16(a, b, c); + // CHECK: sqdmlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +} + +int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { + // CHECK: test_vqdmlsl_high_s32 + return vqdmlsl_high_s32(a, b, c); + // CHECK: sqdmlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +} + +poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { + // CHECK: test_vmull_p8 + return vmull_p8(a, b); + // CHECK: pmull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +} + +poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { + // CHECK: test_vmull_high_p8 + return vmull_high_p8(a, b); + // CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +} diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index d8f203d3df4..f700c6753a6 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -40,15 +40,22 @@ enum OpKind { OpUnavailable, OpAdd, OpAddl, + OpAddlHi, OpAddw, + OpAddwHi, OpSub, OpSubl, + OpSublHi, OpSubw, + OpSubwHi, OpMul, OpMla, OpMlal, + OpMullHi, + OpMlalHi, OpMls, OpMlsl, + OpMlslHi, OpMulN, OpMlaN, OpMlsN, @@ -88,9 +95,18 @@ enum OpKind { OpRev32, OpRev64, OpReinterpret, + OpAddhnHi, + OpRAddhnHi, + OpSubhnHi, + OpRSubhnHi, OpAbdl, + OpAbdlHi, OpAba, OpAbal, + OpAbalHi, + OpQDMullHi, + OpQDMlalHi, + OpQDMlslHi, OpDiv, OpLongHi, OpNarrowHi, @@ -159,15 +175,22 @@ public: OpMap["OP_UNAVAILABLE"] = OpUnavailable; OpMap["OP_ADD"] = OpAdd; OpMap["OP_ADDL"] = OpAddl; + OpMap["OP_ADDLHi"] = OpAddlHi; OpMap["OP_ADDW"] = OpAddw; + OpMap["OP_ADDWHi"] = OpAddwHi; OpMap["OP_SUB"] = OpSub; OpMap["OP_SUBL"] = OpSubl; + OpMap["OP_SUBLHi"] = OpSublHi; OpMap["OP_SUBW"] = OpSubw; + OpMap["OP_SUBWHi"] = OpSubwHi; OpMap["OP_MUL"] = OpMul; OpMap["OP_MLA"] = OpMla; OpMap["OP_MLAL"] = OpMlal; + OpMap["OP_MULLHi"] = OpMullHi; + OpMap["OP_MLALHi"] = OpMlalHi; OpMap["OP_MLS"] = OpMls; OpMap["OP_MLSL"] = OpMlsl; + OpMap["OP_MLSLHi"] = OpMlslHi; OpMap["OP_MUL_N"] = OpMulN; OpMap["OP_MLA_N"] = OpMlaN; OpMap["OP_MLS_N"] = OpMlsN; @@ -207,9 +230,18 @@ public: OpMap["OP_REV32"] = OpRev32; OpMap["OP_REV64"] = OpRev64; OpMap["OP_REINT"] = OpReinterpret; + OpMap["OP_ADDHNHi"] = OpAddhnHi; + OpMap["OP_RADDHNHi"] = OpRAddhnHi; + OpMap["OP_SUBHNHi"] = OpSubhnHi; + OpMap["OP_RSUBHNHi"] = OpRSubhnHi; OpMap["OP_ABDL"] = OpAbdl; + OpMap["OP_ABDLHi"] = OpAbdlHi; OpMap["OP_ABA"] = OpAba; OpMap["OP_ABAL"] = OpAbal; + OpMap["OP_ABALHi"] = OpAbalHi; + OpMap["OP_QDMULLHi"] = OpQDMullHi; + OpMap["OP_QDMLALHi"] = OpQDMlalHi; + OpMap["OP_QDMLSLHi"] = OpQDMlslHi; OpMap["OP_DIV"] = OpDiv; OpMap["OP_LONG_HI"] = OpLongHi; OpMap["OP_NARROW_HI"] = OpNarrowHi; @@ -326,6 +358,29 @@ static char Narrow(const char t) { } } +static std::string GetNarrowTypestr(StringRef ty) +{ + std::string s; + for (size_t i = 0, end = ty.size(); i < end; i++) { + switch (ty[i]) { + case 's': + s += 'c'; + break; + case 'i': + s += 's'; + break; + case 'l': + s += 'i'; + break; + default: + s += ty[i]; + break; + } + } + + return s; +} + /// For a particular StringRef, return the base type code, and whether it has /// the quad-vector, polynomial, or unsigned modifiers set. static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { @@ -426,6 +481,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, if (type == 'h') quad = false; break; + case 'q': + type = Narrow(type); + quad = true; + break; case 'e': type = Narrow(type); usgn = true; @@ -1286,13 +1345,60 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { } // Use the vmovl builtin to sign-extend or zero-extend a vector. -static std::string Extend(StringRef typestr, const std::string &a) { +static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { + std::string s, high; + high = h ? "_high" : ""; + s = MangleName("vmovl" + high, typestr, ClassS); + s += "(" + a + ")"; + return s; +} + +// Get the high 64-bit part of a vector +static std::string GetHigh(const std::string &a, StringRef typestr) { std::string s; - s = MangleName("vmovl", typestr, ClassS); + s = MangleName("vget_high", typestr, ClassS); s += "(" + a + ")"; return s; } +// Gen operation with two operands and get high 64-bit for both of two operands. +static std::string Gen2OpWith2High(StringRef typestr, + const std::string &op, + const std::string &a, + const std::string &b) { + std::string s; + std::string Op1 = GetHigh(a, typestr); + std::string Op2 = GetHigh(b, typestr); + s = MangleName(op, typestr, ClassS); + s += "(" + Op1 + ", " + Op2 + ");"; + return s; +} + +// Gen operation with three operands and get high 64-bit of the latter +// two operands. +static std::string Gen3OpWith2High(StringRef typestr, + const std::string &op, + const std::string &a, + const std::string &b, + const std::string &c) { + std::string s; + std::string Op1 = GetHigh(b, typestr); + std::string Op2 = GetHigh(c, typestr); + s = MangleName(op, typestr, ClassS); + s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; + return s; +} + +// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. +static std::string GenCombine(std::string typestr, + const std::string &a, + const std::string &b) { + std::string s; + s = MangleName("vcombine", typestr, ClassS); + s += "(" + a + ", " + b + ")"; + return s; +} + static std::string Duplicate(unsigned nElts, StringRef typestr, const std::string &a) { std::string s; @@ -1368,18 +1474,30 @@ static std::string GenOpString(const std::string &name, OpKind op, case OpAddl: s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; break; + case OpAddlHi: + s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; + break; case OpAddw: s += "__a + " + Extend(typestr, "__b") + ";"; break; + case OpAddwHi: + s += "__a + " + Extend(typestr, "__b", 1) + ";"; + break; case OpSub: s += "__a - __b;"; break; case OpSubl: s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; break; + case OpSublHi: + s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; + break; case OpSubw: s += "__a - " + Extend(typestr, "__b") + ";"; break; + case OpSubwHi: + s += "__a - " + Extend(typestr, "__b", 1) + ";"; + break; case OpMulN: s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; break; @@ -1413,6 +1531,12 @@ static std::string GenOpString(const std::string &name, OpKind op, case OpMlal: s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; break; + case OpMullHi: + s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); + break; + case OpMlalHi: + s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); + break; case OpMlsN: s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; break; @@ -1433,6 +1557,9 @@ static std::string GenOpString(const std::string &name, OpKind op, case OpMlsl: s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; break; + case OpMlslHi: + s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); + break; case OpQDMullLane: s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; @@ -1560,23 +1687,51 @@ static std::string GenOpString(const std::string &name, OpKind op, } break; } + case OpAbdlHi: + s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); + break; + case OpAddhnHi: { + std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); + s += ";"; + break; + } + case OpRAddhnHi: { + std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); + s += ";"; + break; + } + case OpSubhnHi: { + std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); + s += ";"; + break; + } + case OpRSubhnHi: { + std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); + s += ";"; + break; + } case OpAba: s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; break; - case OpAbal: { - s += "__a + "; - std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)"; - if (typestr[0] != 'U') { - // vabd results are always unsigned and must be zero-extended. - std::string utype = "U" + typestr.str(); - s += "(" + TypeString(proto[0], typestr) + ")"; - abd = "(" + TypeString('d', utype) + ")" + abd; - s += Extend(utype, abd) + ";"; - } else { - s += Extend(typestr, abd) + ";"; - } + case OpAbal: + s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; + break; + case OpAbalHi: + s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); + break; + case OpQDMullHi: + s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); + break; + case OpQDMlalHi: + s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); + break; + case OpQDMlslHi: + s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); break; - } case OpDiv: s += "__a / __b;"; break; @@ -1993,6 +2148,7 @@ void NeonEmitter::run(raw_ostream &OS) { emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); // ARM intrinsics must be emitted before AArch64 intrinsics to ensure // common intrinsics appear only once in the output stream. @@ -2014,6 +2170,10 @@ void NeonEmitter::run(raw_ostream &OS) { // Emit AArch64-specific intrinsics. OS << "#ifdef __aarch64__\n"; + emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); + for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; |

